Removing dependence on extended events for the remaining Variant Annotator modules.

This commit is contained in:
Eric Banks 2012-03-30 09:05:26 -04:00
parent b21889812d
commit b467cd1dae
11 changed files with 33 additions and 294 deletions

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -67,18 +68,19 @@ public class AlleleBalance extends InfoFieldAnnotation {
continue;
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null )
if ( context == null || !context.hasBasePileup() )
continue;
if ( vc.isSNP() && context.hasBasePileup() ) {
final String bases = new String(context.getBasePileup().getBases());
final ReadBackedPileup pileup = context.getBasePileup();
if ( vc.isSNP() ) {
final String bases = new String(pileup.getBases());
if ( bases.length() == 0 )
return null;
char refChr = vc.getReference().toString().charAt(0);
char altChr = vc.getAlternateAllele(0).toString().charAt(0);
final char refChr = vc.getReference().toString().charAt(0);
final char altChr = vc.getAlternateAllele(0).toString().charAt(0);
int refCount = MathUtils.countOccurrences(refChr, bases);
int altCount = MathUtils.countOccurrences(altChr, bases);
final int refCount = MathUtils.countOccurrences(refChr, bases);
final int altCount = MathUtils.countOccurrences(altChr, bases);
// sanity check
if ( refCount + altCount == 0 )
@ -87,22 +89,10 @@ public class AlleleBalance extends InfoFieldAnnotation {
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
ratio += genotype.getLog10PError() * ((double)refCount / (double)(refCount + altCount));
totalWeights += genotype.getLog10PError();
} else if ( vc.isIndel() && context.hasExtendedEventPileup() ) {
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
if ( indelPileup == null ) {
continue;
}
// todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.getNumberOfElements();
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
if ( refCount + altCount == 0 ) {
continue;
}
ratio += /* todo -- make not uniform */ 1 * ((double) refCount) / (double) (refCount + altCount);
totalWeights += 1;
}
// Allele Balance for indels was not being computed correctly (since there was no allele matching). Instead of
// prolonging the life of imperfect code, I've decided to delete it. If someone else wants to try again from
// scratch, be my guest - but make sure it's done correctly! [EB]
}
// make sure we had a het genotype

View File

@ -245,24 +245,16 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
for ( String sample : stratifiedContexts.keySet() ) {
final AlignmentContext context = stratifiedContexts.get(sample);
if ( context == null )
if ( context == null || !context.hasBasePileup() )
continue;
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup == null)
continue;
for (final PileupElement p: pileup) {
final ReadBackedPileup pileup = context.getBasePileup();
for ( final PileupElement p : pileup ) {
if ( p.getRead().isReducedRead() ) // ignore reduced reads
continue;
if ( p.getRead().getMappingQuality() < 20)
if ( p.getRead().getMappingQuality() < 20 )
continue;
if (indelLikelihoodMap.containsKey(p)) {
if ( indelLikelihoodMap.containsKey(p) ) {
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
// A pileup element then has a list of pairs of form (Allele, likelihood of this allele).
// To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles.

View File

@ -30,14 +30,9 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
int mq0 = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup != null) {
final AlignmentContext context = sample.getValue();
if ( context.hasBasePileup() ) {
final ReadBackedPileup pileup = context.getBasePileup();
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;

View File

@ -53,14 +53,8 @@ public class MappingQualityZeroBySample extends GenotypeAnnotation {
return null;
int mq0 = 0;
ReadBackedPileup pileup = null;
if (vc.isIndel() && context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
else return null;
if (pileup != null) {
if ( context.hasBasePileup() ) {
final ReadBackedPileup pileup = context.getBasePileup();
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;

View File

@ -31,13 +31,8 @@ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements E
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
depth += context.size();
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup != null) {
if ( context.hasBasePileup() ) {
final ReadBackedPileup pileup = context.getBasePileup();
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;

View File

@ -45,7 +45,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( context == null )
continue;
depth += context.hasBasePileup() ? context.getBasePileup().depthOfCoverage() : context.getExtendedEventPileup().depthOfCoverage();
depth += context.hasBasePileup() ? context.getBasePileup().depthOfCoverage() : 0;
}
if ( depth == 0 )

View File

@ -39,13 +39,8 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup != null) {
if ( context.hasBasePileup() ) {
final ReadBackedPileup pileup = context.getBasePileup();
for (PileupElement p : pileup ) {
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
qualities[index++] = p.getMappingQual();

View File

@ -1,207 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Unsupported
*/
@Hidden
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc);
return null;
}
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
if ( ! stratifiedContext.hasBasePileup() ) return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlternateAlleles() )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.getNumberOfElements();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
int mq0 = 0; // number of "ref" reads that are acually mq0
for ( PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
// we need to add counts in the correct order
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
}
map.put(getKeyNames().get(1), fracs);
return map;
}
private Map<String,Object> annotateIndel(AlignmentContext
stratifiedContext, VariantContext
vc) {
if ( ! stratifiedContext.hasExtendedEventPileup() ) {
return null;
}
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
return null;
int totalDepth = pileup.getNumberOfElements();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0
HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
}
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( e.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( e.isInsertion() ) {
final String b = e.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
} else {
if ( e.isDeletion() ) {
if ( e.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
}
// else {
// System.out.print(" deletion of WRONG length found");
// }
}
}
}
if ( mq0 == totalDepth ) return map;
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
fracs[i] = String.format("%.3f",
((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
map.put(getKeyNames().get(1), fracs);
//map.put(getKeyNames().get(0), counts);
return map;
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
}

View File

@ -35,13 +35,8 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
int depth = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup != null) {
if ( context.hasBasePileup() ) {
final ReadBackedPileup pileup = context.getBasePileup();
deletions += pileup.getNumberOfDeletions();
depth += pileup.getNumberOfElements();
}

View File

@ -39,15 +39,9 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
ReadBackedPileup pileup = null;
if (context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
if (pileup != null) {
for (PileupElement p : pileup ) {
if ( context.hasBasePileup() ) {
final ReadBackedPileup pileup = context.getBasePileup();
for ( PileupElement p : pileup ) {
if(ReadUtils.is454Read(p.getRead()))
reads454++;
else if (ReadUtils.isSOLiDRead(p.getRead()))

View File

@ -304,12 +304,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
// if the reference base is not ambiguous, we can annotate
Map<String, AlignmentContext> stratifiedContexts;
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
if ( ! context.hasExtendedEventPileup() ) {
if ( context.hasBasePileup() ) {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
} else {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup());
}
if ( stratifiedContexts != null ) {
annotatedVCs = new ArrayList<VariantContext>(VCs.size());
for ( VariantContext vc : VCs )
annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));