From 94dc09c865494a21d3ec65b1ab155e09a03bf650 Mon Sep 17 00:00:00 2001 From: chartl Date: Sat, 23 Jan 2010 16:13:24 +0000 Subject: [PATCH] PlinkRod now successfully instantiates on the binary ped file trio (.bim, .bam, .fam) for non-indel files. Upcoming: Test that the instantiation is correct, do it for indel-containing files. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2668 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/PlinkRodWithGenomeLoc.java | 35 ++++++++++--------- .../sting/gatk/refdata/PlinkRodTest.java | 10 ++++++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java index c34304c95..22acf323b 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java @@ -23,7 +23,7 @@ import net.sf.samtools.SAMFileHeader; public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum { private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex", "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE")); - private final byte SNP_MAJOR_MODE = 0x00000001; + private final byte SNP_MAJOR_MODE = 1; private ArrayList variants; private PlinkVariantInfo currentVariant; @@ -240,10 +240,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta(); String absolute_path = file.getAbsolutePath(); String[] directory_tree = absolute_path.split("/"); - String file_name = directory_tree[directory_tree.length-1].split(".")[0]; + String file_name = directory_tree[directory_tree.length-1].split("\\.")[0]; StringBuilder pathBuilder = new StringBuilder(); - for ( String folder : directory_tree ) { - pathBuilder.append(String.format("%s/",folder)); + for ( int i = 0; i < directory_tree.length - 1; i ++ ) { + pathBuilder.append(String.format("%s/",directory_tree[i])); } String path = pathBuilder.toString(); trifecta.bedFile = new File(path+file_name+".bed"); @@ -337,20 +337,20 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements if ( snpMajorMode ) { sampleOffset = sampleOffset + 4; - while ( sampleOffset > samples.size() ) { + while ( sampleOffset > samples.size() -1 ) { snpOffset ++; sampleOffset = sampleOffset % samples.size(); } } else { snpOffset = snpOffset + 4; - while ( snpOffset > variants.size() ) { + while ( snpOffset > variants.size() -1 ) { sampleOffset ++; snpOffset = snpOffset % samples.size(); } } } else { - if ( bytesRead == 2) { + if ( bytesRead == 3) { snpMajorMode = genotype == SNP_MAJOR_MODE; } } @@ -371,21 +371,21 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements if ( major ) { sampleOffset++; - while ( sampleOffset > sampleNames.size() ) { + while ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4 snpOffset++; sampleOffset = sampleOffset % sampleNames.size(); } - if ( snpOffset >= variants.size() ) { + if ( snpOffset > variants.size()-1) { // done with file; early return return; } } else { snpOffset++; - while( snpOffset > variants.size() ) { + while( snpOffset > variants.size()-1 ) { sampleOffset++; snpOffset = snpOffset % variants.size(); } - if ( sampleOffset >= sampleNames.size() ) { + if ( sampleOffset > sampleNames.size()-1 ) { // done with file; early return return; } @@ -395,10 +395,10 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements private int[] parseGenotypes(byte genotype) { int[] genotypes = new int[4]; - genotypes[0] = ( genotype & 0x00000011 ); - genotypes[1] = ( ( genotype & 0x00001100 ) >>> 2 ); - genotypes[2] = ( ( genotype & 0x00110000 ) >>> 4 ); - genotypes[3] = ( ( genotype & 0x11000000 ) >>> 6 ); + genotypes[0] = ( genotype & 3 ); + genotypes[1] = ( ( genotype & 12 ) >>> 2 ); + genotypes[2] = ( ( genotype & 48 ) >>> 4 ); + genotypes[3] = ( ( genotype & 192 ) >>> 6 ); return genotypes; } } @@ -504,9 +504,12 @@ class PlinkVariantInfo implements Comparable { } else if (genoTYPE == 1) { alleleStr[0] = locAllele1; alleleStr[1] = locAllele2; - } else { + } else if (genoTYPE == 3 ) { alleleStr[0] = locAllele2; alleleStr[1] = locAllele2; + } else { + alleleStr[0] = "0"; + alleleStr[1] = "0"; } if ( this.isSNP() ) { diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java index c8c770486..8c1bbcd32 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java @@ -152,4 +152,14 @@ public class PlinkRodTest extends BaseTest { Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases()); Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases()); } + + @Test + public void testBinaryPedFileNoIndels() { + PlinkRodWithGenomeLoc rod = new PlinkRodWithGenomeLoc("binaryTest1"); + try { + rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed")); + } catch (FileNotFoundException e) { + throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e); + } + } }