PlinkRod now correctly parses binary files without indels; unit test added for this behavior.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2669 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
94dc09c865
commit
ae22d35212
|
|
@ -273,7 +273,7 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
if ( line != null ) {
|
||||
String[] snpInfo = line.split("\\s+");
|
||||
PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true);
|
||||
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[2]), Long.valueOf(snpInfo[2])+1));
|
||||
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
|
||||
variant.setAlleles(snpInfo[4],snpInfo[5]);
|
||||
variants.add(variant);
|
||||
}
|
||||
|
|
@ -337,15 +337,15 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
|
||||
if ( snpMajorMode ) {
|
||||
sampleOffset = sampleOffset + 4;
|
||||
while ( sampleOffset > samples.size() -1 ) {
|
||||
if ( sampleOffset > samples.size() -1 ) {
|
||||
snpOffset ++;
|
||||
sampleOffset = sampleOffset % samples.size();
|
||||
sampleOffset = 0;
|
||||
}
|
||||
} else {
|
||||
snpOffset = snpOffset + 4;
|
||||
while ( snpOffset > variants.size() -1 ) {
|
||||
if ( snpOffset > variants.size() -1 ) {
|
||||
sampleOffset ++;
|
||||
snpOffset = snpOffset % samples.size();
|
||||
snpOffset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -371,22 +371,12 @@ public class PlinkRodWithGenomeLoc extends BasicReferenceOrderedDatum implements
|
|||
|
||||
if ( major ) {
|
||||
sampleOffset++;
|
||||
while ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4
|
||||
snpOffset++;
|
||||
sampleOffset = sampleOffset % sampleNames.size();
|
||||
}
|
||||
if ( snpOffset > variants.size()-1) {
|
||||
// done with file; early return
|
||||
if ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
snpOffset++;
|
||||
while( snpOffset > variants.size()-1 ) {
|
||||
sampleOffset++;
|
||||
snpOffset = snpOffset % variants.size();
|
||||
}
|
||||
if ( sampleOffset > sampleNames.size()-1 ) {
|
||||
// done with file; early return
|
||||
if( snpOffset > variants.size()-1 ) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -444,11 +434,17 @@ class PlinkVariantInfo implements Comparable {
|
|||
}
|
||||
|
||||
public void setAlleles(String al1, String al2) {
|
||||
locAllele1 = al1;
|
||||
if ( al1.equals("0") ) {
|
||||
// encoding for a site at which no variants were detected
|
||||
locAllele1 = al2;
|
||||
} else {
|
||||
locAllele1 = al1;
|
||||
}
|
||||
locAllele2 = al2;
|
||||
if ( ! isSNP() ) {
|
||||
siteIndelLength = Math.max(locAllele1.length(),locAllele2.length());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// CONSTRUCTOR
|
||||
|
|
@ -501,7 +497,7 @@ class PlinkVariantInfo implements Comparable {
|
|||
if ( genoTYPE == 0 ) {
|
||||
alleleStr[0] = locAllele1;
|
||||
alleleStr[1] = locAllele1;
|
||||
} else if (genoTYPE == 1) {
|
||||
} else if (genoTYPE == 2) {
|
||||
alleleStr[0] = locAllele1;
|
||||
alleleStr[1] = locAllele2;
|
||||
} else if (genoTYPE == 3 ) {
|
||||
|
|
|
|||
|
|
@ -161,5 +161,46 @@ public class PlinkRodTest extends BaseTest {
|
|||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||
}
|
||||
|
||||
// iterate through the ROD and get stuff
|
||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
|
||||
|
||||
do {
|
||||
lociInRod.add(rod.getLocation());
|
||||
genotypesInRod.add(rod.getGenotypes());
|
||||
samplesInRod.add(rod.getVariantSampleNames());
|
||||
} while ( rod.parseLine(null,null) );
|
||||
|
||||
List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
|
||||
|
||||
for ( int i = 0; i < expecLoc.size(); i ++ ) {
|
||||
Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
|
||||
}
|
||||
|
||||
List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
|
||||
"GG","GG","AG","TT","CC","CT","TG","GG","GG");
|
||||
List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
|
||||
false,false,false,true,false,false,true,true,false,false);
|
||||
List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||
"NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||
"NA12878","NA12890","NA07000");
|
||||
int snpNo = 1;
|
||||
int indiv = 1;
|
||||
int alleleOffset = 0;
|
||||
for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||
for ( Genotype gen : snp ) {
|
||||
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||
Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
|
||||
"(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
|
||||
Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
|
||||
Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
|
||||
indiv++;
|
||||
alleleOffset++;
|
||||
}
|
||||
indiv = 1;
|
||||
snpNo++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue