Reverting Mark's previous commit as per the open discussion. Now the eval modules check isPolymorphic() before accruing stats when appropriate. Fixed the IndelLengthHistogram module not to error out if the indel isn't simple (that would have been bad). Only integration test that needed to be updated was the tranches one based on a separate commit from Mark.
This commit is contained in:
parent
d7e355b4b6
commit
aa9e32f2f1
|
|
@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean evalIsGood = eval != null && eval.isVariant();
|
||||
boolean evalIsGood = eval != null && eval.isPolymorphic();
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
|
||||
|
||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
||||
|
|
|
|||
|
|
@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
||||
// added in a proxy check for monomorphic status here.
|
||||
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
||||
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
|
||||
if ( vc1.isMonomorphic() ) {
|
||||
nRefLoci++;
|
||||
} else {
|
||||
switch (vc1.getType()) {
|
||||
case NO_VARIATION:
|
||||
// shouldn't get here
|
||||
break;
|
||||
case SNP:
|
||||
nVariantLoci++;
|
||||
|
|
|
|||
|
|
@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
public int getComparisonOrder() { return 1; } // need only the evals
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
|
||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
if ( vc1.isIndel() ) {
|
||||
if ( vc1.isIndel() && vc1.isPolymorphic() ) {
|
||||
|
||||
if ( ! vc1.isBiallelic() ) {
|
||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
// only count simple insertions/deletions, not complex indels
|
||||
if ( vc1.isSimpleInsertion() ) {
|
||||
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||
} else if ( vc1.isSimpleDeletion() ) {
|
||||
indelHistogram.update(-vc1.getReference().length());
|
||||
} else {
|
||||
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if (eval != null ) {
|
||||
if (eval != null && eval.isPolymorphic()) {
|
||||
if ( indelStats == null ) {
|
||||
indelStats = new IndelStats(eval);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
|||
}
|
||||
}
|
||||
|
||||
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
|
||||
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
|
||||
metrics.incrValue(eval);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
|
||||
return null; //no interesting sites
|
||||
}
|
||||
|
||||
if (vc.hasGenotypes()) {
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
|
||||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
|
||||
if (VariantContextUtils.isTransition(vc)) {
|
||||
if (updateStandard) nTiInComp++;
|
||||
else nTi++;
|
||||
|
|
|
|||
|
|
@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||
if ( ! vc.isVariant() ) return SiteStatus.MONO;
|
||||
if ( vc.isMonomorphic() ) return SiteStatus.MONO;
|
||||
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
|
||||
|
||||
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
int ac = 0;
|
||||
|
|
@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
else
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else if ( vc.hasGenotypes() ) {
|
||||
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else {
|
||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
|
||||
|
|
|
|||
|
|
@ -277,7 +277,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
|
||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());
|
||||
|
||||
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
|
||||
|
||||
|
|
|
|||
|
|
@ -264,7 +264,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue