Reverting Mark's previous commit as per the open discussion. Now the eval modules check isPolymorphic() before accruing stats when appropriate. Fixed the IndelLengthHistogram module not to error out if the indel isn't simple (that would have been bad). Only integration test that needed to be updated was the tranches one based on a separate commit from Mark.

This commit is contained in:
Eric Banks 2011-09-07 15:48:06 -04:00
parent d7e355b4b6
commit aa9e32f2f1
11 changed files with 69 additions and 71 deletions

View File

@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isVariant();
boolean evalIsGood = eval != null && eval.isPolymorphic();
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
if (compIsGood) nCompVariants++; // count the number of comp events

View File

@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
// added in a proxy check for monomorphic status here.
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
if ( vc1.isMonomorphic() ) {
nRefLoci++;
} else {
switch (vc1.getType()) {
case NO_VARIATION:
// shouldn't get here
break;
case SNP:
nVariantLoci++;

View File

@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
public int getComparisonOrder() { return 1; } // need only the evals
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
if ( vc1.isIndel() ) {
if ( vc1.isIndel() && vc1.isPolymorphic() ) {
if ( ! vc1.isBiallelic() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
// only count simple insertions/deletions, not complex indels
if ( vc1.isSimpleInsertion() ) {
indelHistogram.update(vc1.getAlternateAllele(0).length());
} else if ( vc1.isSimpleDeletion() ) {
indelHistogram.update(-vc1.getReference().length());
} else {
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
}
}

View File

@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null ) {
if (eval != null && eval.isPolymorphic()) {
if ( indelStats == null ) {
indelStats = new IndelStats(eval);
}

View File

@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
}
}
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
metrics.incrValue(eval);
}
}

View File

@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
}
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
return null; //no interesting sites
}
if (vc.hasGenotypes()) {
//this maps allele to a count
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
//this maps allele to a count
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
int numHetsHere = 0;
float numGenosHere = 0;
int numIndsHere = 0;
int numHetsHere = 0;
float numGenosHere = 0;
int numIndsHere = 0;
for (Genotype genotype : vc.getGenotypes().values()) {
numIndsHere++;
if (!genotype.isNoCall()) {
//increment stats for heterozygosity
if (genotype.isHet()) {
numHetsHere++;
}
for (Genotype genotype : vc.getGenotypes().values()) {
numIndsHere++;
if (!genotype.isNoCall()) {
//increment stats for heterozygosity
if (genotype.isHet()) {
numHetsHere++;
}
numGenosHere++;
//increment stats for pairwise mismatches
numGenosHere++;
//increment stats for pairwise mismatches
for (Allele allele : genotype.getAlleles()) {
if (allele.isNonNull() && allele.isCalled()) {
String alleleString = allele.toString();
alleleCounts.putIfAbsent(alleleString, 0);
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
}
for (Allele allele : genotype.getAlleles()) {
if (allele.isNonNull() && allele.isCalled()) {
String alleleString = allele.toString();
alleleCounts.putIfAbsent(alleleString, 0);
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
}
}
}
if (numGenosHere > 0) {
//only if have one called genotype at least
this.numSites++;
}
if (numGenosHere > 0) {
//only if have one called genotype at least
this.numSites++;
this.totalHet += numHetsHere / numGenosHere;
this.totalHet += numHetsHere / numGenosHere;
//compute based on num sites
float harmonicFactor = 0;
for (int i = 1; i <= numIndsHere; i++) {
harmonicFactor += 1.0 / i;
}
this.thetaRegionNumSites += 1.0 / harmonicFactor;
//compute based on num sites
float harmonicFactor = 0;
for (int i = 1; i <= numIndsHere; i++) {
harmonicFactor += 1.0 / i;
}
this.thetaRegionNumSites += 1.0 / harmonicFactor;
//now compute pairwise mismatches
float numPairwise = 0;
float numDiffs = 0;
for (String allele1 : alleleCounts.keySet()) {
int allele1Count = alleleCounts.get(allele1);
//now compute pairwise mismatches
float numPairwise = 0;
float numDiffs = 0;
for (String allele1 : alleleCounts.keySet()) {
int allele1Count = alleleCounts.get(allele1);
for (String allele2 : alleleCounts.keySet()) {
if (allele1.compareTo(allele2) < 0) {
continue;
}
if (allele1 .compareTo(allele2) == 0) {
numPairwise += allele1Count * (allele1Count - 1) * .5;
for (String allele2 : alleleCounts.keySet()) {
if (allele1.compareTo(allele2) < 0) {
continue;
}
if (allele1 .compareTo(allele2) == 0) {
numPairwise += allele1Count * (allele1Count - 1) * .5;
}
else {
int allele2Count = alleleCounts.get(allele2);
numPairwise += allele1Count * allele2Count;
numDiffs += allele1Count * allele2Count;
}
}
else {
int allele2Count = alleleCounts.get(allele2);
numPairwise += allele1Count * allele2Count;
numDiffs += allele1Count * allele2Count;
}
}
}
if (numPairwise > 0) {
this.totalAvgDiffs += numDiffs / numPairwise;
}
if (numPairwise > 0) {
this.totalAvgDiffs += numDiffs / numPairwise;
}
}

View File

@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
}
public void updateTiTv(VariantContext vc, boolean updateStandard) {
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
if (VariantContextUtils.isTransition(vc)) {
if (updateStandard) nTiInComp++;
else nTi++;

View File

@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
public SiteStatus calcSiteStatus(VariantContext vc) {
if ( vc == null ) return SiteStatus.NO_CALL;
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
if ( ! vc.isVariant() ) return SiteStatus.MONO;
if ( vc.isMonomorphic() ) return SiteStatus.MONO;
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
int ac = 0;
@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
else
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
} else if ( vc.hasGenotypes() ) {
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
} else {
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
//return SiteStatus.NO_CALL; // we can't figure out what to do

View File

@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null;
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
if( titvStats == null ) { titvStats = new TiTvStats(); }
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));

View File

@ -277,7 +277,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());

View File

@ -264,7 +264,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testTranches() {
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
executeTestParallel("testTranches",spec);
}