PlinkRod now successfully instantiates on the binary ped file trio (.bim, .bam, .fam) for non-indel files.
Upcoming: Test that the instantiation is correct, do it for indel-containing files. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2668 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
01db93299c
commit
94dc09c865
|
|
@ -23,7 +23,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
|
||||
private final Set<String> headerEntries = new HashSet<String>(Arrays.asList("#Family ID","Individual ID","Sex",
|
||||
"Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE"));
|
||||
private final byte SNP_MAJOR_MODE = 0x00000001;
|
||||
private final byte SNP_MAJOR_MODE = 1;
|
||||
|
||||
private ArrayList<PlinkVariantInfo> variants;
|
||||
private PlinkVariantInfo currentVariant;
|
||||
|
|
@ -240,10 +240,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta();
|
||||
String absolute_path = file.getAbsolutePath();
|
||||
String[] directory_tree = absolute_path.split("/");
|
||||
String file_name = directory_tree[directory_tree.length-1].split(".")[0];
|
||||
String file_name = directory_tree[directory_tree.length-1].split("\\.")[0];
|
||||
StringBuilder pathBuilder = new StringBuilder();
|
||||
for ( String folder : directory_tree ) {
|
||||
pathBuilder.append(String.format("%s/",folder));
|
||||
for ( int i = 0; i < directory_tree.length - 1; i ++ ) {
|
||||
pathBuilder.append(String.format("%s/",directory_tree[i]));
|
||||
}
|
||||
String path = pathBuilder.toString();
|
||||
trifecta.bedFile = new File(path+file_name+".bed");
|
||||
|
|
@ -337,20 +337,20 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
|
||||
if ( snpMajorMode ) {
|
||||
sampleOffset = sampleOffset + 4;
|
||||
while ( sampleOffset > samples.size() ) {
|
||||
while ( sampleOffset > samples.size() -1 ) {
|
||||
snpOffset ++;
|
||||
sampleOffset = sampleOffset % samples.size();
|
||||
}
|
||||
} else {
|
||||
snpOffset = snpOffset + 4;
|
||||
while ( snpOffset > variants.size() ) {
|
||||
while ( snpOffset > variants.size() -1 ) {
|
||||
sampleOffset ++;
|
||||
snpOffset = snpOffset % samples.size();
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
if ( bytesRead == 2) {
|
||||
if ( bytesRead == 3) {
|
||||
snpMajorMode = genotype == SNP_MAJOR_MODE;
|
||||
}
|
||||
}
|
||||
|
|
@ -371,21 +371,21 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
|
||||
if ( major ) {
|
||||
sampleOffset++;
|
||||
while ( sampleOffset > sampleNames.size() ) {
|
||||
while ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4
|
||||
snpOffset++;
|
||||
sampleOffset = sampleOffset % sampleNames.size();
|
||||
}
|
||||
if ( snpOffset >= variants.size() ) {
|
||||
if ( snpOffset > variants.size()-1) {
|
||||
// done with file; early return
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
snpOffset++;
|
||||
while( snpOffset > variants.size() ) {
|
||||
while( snpOffset > variants.size()-1 ) {
|
||||
sampleOffset++;
|
||||
snpOffset = snpOffset % variants.size();
|
||||
}
|
||||
if ( sampleOffset >= sampleNames.size() ) {
|
||||
if ( sampleOffset > sampleNames.size()-1 ) {
|
||||
// done with file; early return
|
||||
return;
|
||||
}
|
||||
|
|
@ -395,10 +395,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
|
||||
private int[] parseGenotypes(byte genotype) {
|
||||
int[] genotypes = new int[4];
|
||||
genotypes[0] = ( genotype & 0x00000011 );
|
||||
genotypes[1] = ( ( genotype & 0x00001100 ) >>> 2 );
|
||||
genotypes[2] = ( ( genotype & 0x00110000 ) >>> 4 );
|
||||
genotypes[3] = ( ( genotype & 0x11000000 ) >>> 6 );
|
||||
genotypes[0] = ( genotype & 3 );
|
||||
genotypes[1] = ( ( genotype & 12 ) >>> 2 );
|
||||
genotypes[2] = ( ( genotype & 48 ) >>> 4 );
|
||||
genotypes[3] = ( ( genotype & 192 ) >>> 6 );
|
||||
return genotypes;
|
||||
}
|
||||
}
|
||||
|
|
@ -504,9 +504,12 @@ class PlinkVariantInfo implements Comparable {
|
|||
} else if (genoTYPE == 1) {
|
||||
alleleStr[0] = locAllele1;
|
||||
alleleStr[1] = locAllele2;
|
||||
} else {
|
||||
} else if (genoTYPE == 3 ) {
|
||||
alleleStr[0] = locAllele2;
|
||||
alleleStr[1] = locAllele2;
|
||||
} else {
|
||||
alleleStr[0] = "0";
|
||||
alleleStr[1] = "0";
|
||||
}
|
||||
|
||||
if ( this.isSNP() ) {
|
||||
|
|
|
|||
|
|
@ -152,4 +152,14 @@ public class PlinkRodTest extends BaseTest {
|
|||
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
|
||||
Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBinaryPedFileNoIndels() {
|
||||
PlinkRodWithGenomeLoc rod = new PlinkRodWithGenomeLoc("binaryTest1");
|
||||
try {
|
||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue