- Multi-allelic sites are now correctly ignored - Reporting of mendelian violations enhanced - Corrected TP overflow by caping it to Bye.MAX_VALUE

-Updated integrationtests to reflect changes in MVF file output

Signed-off-by: Eric Banks <ebanks@broadinstitute.org>
This commit is contained in:
Laurent Francioli 2012-07-17 16:28:11 +02:00 committed by Eric Banks
parent 863eb5b5c0
commit 68d0e4dd6d
2 changed files with 19 additions and 18 deletions

View File

@ -342,9 +342,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){ private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){
int phredScoreTransmission = -1; int phredScoreTransmission = -1;
if(transmissionProb != NO_TRANSMISSION_PROB) if(transmissionProb != NO_TRANSMISSION_PROB){
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb)); double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
}
//Handle null, missing and unavailable genotypes //Handle null, missing and unavailable genotypes
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
//genotype so it is safe to return the original genotype in this case. //genotype so it is safe to return the original genotype in this case.
@ -410,7 +411,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
buildMatrices(); buildMatrices();
if(mvFile != null) if(mvFile != null)
mvFile.println("#CHROM\tPOS\tFILTER\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_RAD\tMOTHER_AAD\tMOTHER_HRPL\tMOTHER_HETPL\tMOTHER_HAPL\tFATHER_GT\tFATHER_DP\tFATHER_RAD\tFATHER_AAD\tFATHER_HRPL\tFATHER_HETPL\tFATHER_HAPL\tCHILD_GT\tCHILD_DP\tCHILD_RAD\tCHILD_AAD\tCHILD_HRPL\tCHILD_HETPL\tCHILD_HAPL"); mvFile.println("CHROM\tPOS\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_AD\tMOTHER_PL\tFATHER_GT\tFATHER_DP\tFATHER_AD\tFATHER_PL\tCHILD_GT\tCHILD_DP\tCHILD_AD\tCHILD_PL");
} }
@ -776,7 +777,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
return metricsCounters; return metricsCounters;
final VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); final VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
if (vc == null) if (vc == null || !vc.isBiallelic())
return metricsCounters; return metricsCounters;
final VariantContextBuilder builder = new VariantContextBuilder(vc); final VariantContextBuilder builder = new VariantContextBuilder(vc);
@ -805,8 +806,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(father != null){ if(father != null){
genotypesContext.replace(phasedFather); genotypesContext.replace(phasedFather);
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters); updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s", mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()), phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString()); phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
@ -817,8 +818,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters); updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters);
if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType())) if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType()))
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s", mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(), phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
} }
@ -828,15 +829,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters); updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType())) if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s", mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
} }
//Report violation if set so //Report violation if set so
//TODO: ADAPT FOR PAIRS TOO!! //TODO: ADAPT FOR PAIRS TOO!!
if(mvCount>0 && mvFile != null) if(mvCount>0 && mvFile != null && !vc.isFiltered())
mvFile.println(mvfLine); mvFile.println(mvfLine);
} }

View File

@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s" "-o %s"
), ),
2, 2,
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","f8721f4f5d3bae2848ae15c3f120709b") Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b")
); );
executeTest("testTrueNegativeMV", spec); executeTest("testTrueNegativeMV", spec);
} }
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s" "-o %s"
), ),
2, 2,
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","547fdfef393f3045a96d245ef6af8acb") Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb")
); );
executeTest("testTruePositiveMV", spec); executeTest("testTruePositiveMV", spec);
} }
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s" "-o %s"
), ),
2, 2,
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","9529e2bf214d72e792d93fbea22a3b91") Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91")
); );
executeTest("testFalsePositiveMV", spec); executeTest("testFalsePositiveMV", spec);
} }
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s" "-o %s"
), ),
2, 2,
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","8c157d79dd00063d2932f0d2b96f53d8") Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8")
); );
executeTest("testSpecialCases", spec); executeTest("testSpecialCases", spec);
} }
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s" "-o %s"
), ),
2, 2,
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","343e418850ae4a687ebef2acd55fcb07") Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07")
); );
executeTest("testPriorOption", spec); executeTest("testPriorOption", spec);
} }
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-fatherAlleleFirst" "-fatherAlleleFirst"
), ),
2, 2,
Arrays.asList("60ced3d078792a150a03640b62926857","52ffa82428e63ade22ea37b72ae58492") Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492")
); );
executeTest("testFatherAlleleFirst", spec); executeTest("testFatherAlleleFirst", spec);
} }