diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java index 22acf323b..bd7f96e8c 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRodWithGenomeLoc.java @@ -273,7 +273,7 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements if ( line != null ) { String[] snpInfo = line.split("\\s+"); PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true); - variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[2]), Long.valueOf(snpInfo[2])+1)); + variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3]))); variant.setAlleles(snpInfo[4],snpInfo[5]); variants.add(variant); } @@ -337,15 +337,15 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements if ( snpMajorMode ) { sampleOffset = sampleOffset + 4; - while ( sampleOffset > samples.size() -1 ) { + if ( sampleOffset > samples.size() -1 ) { snpOffset ++; - sampleOffset = sampleOffset % samples.size(); + sampleOffset = 0; } } else { snpOffset = snpOffset + 4; - while ( snpOffset > variants.size() -1 ) { + if ( snpOffset > variants.size() -1 ) { sampleOffset ++; - snpOffset = snpOffset % samples.size(); + snpOffset = 0; } } @@ -371,22 +371,12 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements if ( major ) { sampleOffset++; - while ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4 - snpOffset++; - sampleOffset = sampleOffset % sampleNames.size(); - } - if ( snpOffset > variants.size()-1) { - // done with file; early return + if ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4 return; } } else { snpOffset++; - while( snpOffset > variants.size()-1 ) { - sampleOffset++; - snpOffset = snpOffset % variants.size(); - } - if ( sampleOffset > sampleNames.size()-1 ) { - // done with file; early return + if( snpOffset > variants.size()-1 ) { return; } } @@ -444,11 +434,17 @@ class PlinkVariantInfo implements Comparable { } public void setAlleles(String al1, String al2) { - locAllele1 = al1; + if ( al1.equals("0") ) { + // encoding for a site at which no variants were detected + locAllele1 = al2; + } else { + locAllele1 = al1; + } locAllele2 = al2; if ( ! isSNP() ) { siteIndelLength = Math.max(locAllele1.length(),locAllele2.length()); } + } // CONSTRUCTOR @@ -501,7 +497,7 @@ class PlinkVariantInfo implements Comparable { if ( genoTYPE == 0 ) { alleleStr[0] = locAllele1; alleleStr[1] = locAllele1; - } else if (genoTYPE == 1) { + } else if (genoTYPE == 2) { alleleStr[0] = locAllele1; alleleStr[1] = locAllele2; } else if (genoTYPE == 3 ) { diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java index 8c1bbcd32..a384e73a9 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java @@ -161,5 +161,46 @@ public class PlinkRodTest extends BaseTest { } catch (FileNotFoundException e) { throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e); } + + // iterate through the ROD and get stuff + ArrayList lociInRod = new ArrayList(); + ArrayList> genotypesInRod = new ArrayList>(); + ArrayList> samplesInRod = new ArrayList>(); + + do { + lociInRod.add(rod.getLocation()); + genotypesInRod.add(rod.getGenotypes()); + samplesInRod.add(rod.getVariantSampleNames()); + } while ( rod.parseLine(null,null) ); + + List expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713"); + + for ( int i = 0; i < expecLoc.size(); i ++ ) { + Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString()); + } + + List expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT", + "GG","GG","AG","TT","CC","CT","TG","GG","GG"); + List expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false, + false,false,false,true,false,false,true,true,false,false); + List expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000", + "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000", + "NA12878","NA12890","NA07000"); + int snpNo = 1; + int indiv = 1; + int alleleOffset = 0; + for ( ArrayList snp : genotypesInRod ) { + for ( Genotype gen : snp ) { + String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases(); + Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+ + "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr); + Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet()); + Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1)); + indiv++; + alleleOffset++; + } + indiv = 1; + snpNo++; + } } }