PlinkRod now successfully instantiates on the binary ped file trio (.bim, .bam, .fam) for non-indel files.

Upcoming: Test that the instantiation is correct, do it for indel-containing files.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2668 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-23 16:13:24 +00:00
parent 01db93299c
commit 94dc09c865
2 changed files with 29 additions and 16 deletions

View File

@ -23,7 +23,7 @@ import net.sf.samtools.SAMFileHeader;
public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
private final Set<String> headerEntries = new HashSet<String>(Arrays.asList("#Family ID","Individual ID","Sex",
"Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE"));
private final byte SNP_MAJOR_MODE = 0x00000001;
private final byte SNP_MAJOR_MODE = 1;
private ArrayList<PlinkVariantInfo> variants;
private PlinkVariantInfo currentVariant;
@ -240,10 +240,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta();
String absolute_path = file.getAbsolutePath();
String[] directory_tree = absolute_path.split("/");
String file_name = directory_tree[directory_tree.length-1].split(".")[0];
String file_name = directory_tree[directory_tree.length-1].split("\\.")[0];
StringBuilder pathBuilder = new StringBuilder();
for ( String folder : directory_tree ) {
pathBuilder.append(String.format("%s/",folder));
for ( int i = 0; i < directory_tree.length - 1; i ++ ) {
pathBuilder.append(String.format("%s/",directory_tree[i]));
}
String path = pathBuilder.toString();
trifecta.bedFile = new File(path+file_name+".bed");
@ -337,20 +337,20 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
if ( snpMajorMode ) {
sampleOffset = sampleOffset + 4;
while ( sampleOffset > samples.size() ) {
while ( sampleOffset > samples.size() -1 ) {
snpOffset ++;
sampleOffset = sampleOffset % samples.size();
}
} else {
snpOffset = snpOffset + 4;
while ( snpOffset > variants.size() ) {
while ( snpOffset > variants.size() -1 ) {
sampleOffset ++;
snpOffset = snpOffset % samples.size();
}
}
} else {
if ( bytesRead == 2) {
if ( bytesRead == 3) {
snpMajorMode = genotype == SNP_MAJOR_MODE;
}
}
@ -371,21 +371,21 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
if ( major ) {
sampleOffset++;
while ( sampleOffset > sampleNames.size() ) {
while ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4
snpOffset++;
sampleOffset = sampleOffset % sampleNames.size();
}
if ( snpOffset >= variants.size() ) {
if ( snpOffset > variants.size()-1) {
// done with file; early return
return;
}
} else {
snpOffset++;
while( snpOffset > variants.size() ) {
while( snpOffset > variants.size()-1 ) {
sampleOffset++;
snpOffset = snpOffset % variants.size();
}
if ( sampleOffset >= sampleNames.size() ) {
if ( sampleOffset > sampleNames.size()-1 ) {
// done with file; early return
return;
}
@ -395,10 +395,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
private int[] parseGenotypes(byte genotype) {
int[] genotypes = new int[4];
genotypes[0] = ( genotype & 0x00000011 );
genotypes[1] = ( ( genotype & 0x00001100 ) >>> 2 );
genotypes[2] = ( ( genotype & 0x00110000 ) >>> 4 );
genotypes[3] = ( ( genotype & 0x11000000 ) >>> 6 );
genotypes[0] = ( genotype & 3 );
genotypes[1] = ( ( genotype & 12 ) >>> 2 );
genotypes[2] = ( ( genotype & 48 ) >>> 4 );
genotypes[3] = ( ( genotype & 192 ) >>> 6 );
return genotypes;
}
}
@ -504,9 +504,12 @@ class PlinkVariantInfo implements Comparable {
} else if (genoTYPE == 1) {
alleleStr[0] = locAllele1;
alleleStr[1] = locAllele2;
} else {
} else if (genoTYPE == 3 ) {
alleleStr[0] = locAllele2;
alleleStr[1] = locAllele2;
} else {
alleleStr[0] = "0";
alleleStr[1] = "0";
}
if ( this.isSNP() ) {

View File

@ -152,4 +152,14 @@ public class PlinkRodTest extends BaseTest {
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
}
@Test
public void testBinaryPedFileNoIndels() {
PlinkRodWithGenomeLoc rod = new PlinkRodWithGenomeLoc("binaryTest1");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
}
}