Filtering optimizations are now live for UGv2. Instead of re-computing filtered bases at every locus, they are computed just once per read and stored in the read itself. Eyeballing the results on the ~600 sample set from 1kg, we cut out ~40% of the runtime! QUALs are now sometimes different from UGv1 because I noticed a bug in v1 where samples with spanning deletions only were assigned ref calls instead of no-calls which ever so slightly affects the QUAL. Not a big deal though.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4494 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4ac636e288
commit
cfb33d8e12
|
|
@ -30,6 +30,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
|
@ -133,7 +135,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
|
attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
|
||||||
|
|
||||||
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
||||||
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
|
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
|
||||||
|
|
@ -151,7 +153,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
myAlleles.add(ref);
|
myAlleles.add(ref);
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
|
attributes.put(VCFConstants.DEPTH_KEY, getUnfilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
|
||||||
|
|
||||||
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
||||||
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
|
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
|
||||||
|
|
@ -159,7 +161,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
double GQ = GL.getAALikelihoods() - Math.max(GL.getABLikelihoods(), GL.getBBLikelihoods());
|
double GQ = GL.getAALikelihoods() - Math.max(GL.getABLikelihoods(), GL.getBBLikelihoods());
|
||||||
|
|
||||||
calls.put(sample, new Genotype(sample, myAlleles, GQ, null, attributes, false));
|
calls.put(sample, new Genotype(sample, myAlleles, GQ, null, attributes, false));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return calls;
|
return calls;
|
||||||
|
|
@ -187,6 +188,16 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
return ( refLikelihoodMinusEpsilon > likelihoods[1] && refLikelihoodMinusEpsilon > likelihoods[2]);
|
return ( refLikelihoodMinusEpsilon > likelihoods[1] && refLikelihoodMinusEpsilon > likelihoods[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int getUnfilteredDepth(ReadBackedPileup pileup) {
|
||||||
|
int count = 0;
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
|
if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) )
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
protected class CalculatedAlleleFrequency {
|
protected class CalculatedAlleleFrequency {
|
||||||
|
|
||||||
public double log10PNonRef;
|
public double log10PNonRef;
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
|
|
@ -305,14 +306,9 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
|
|
||||||
for ( PileupElement p : pileup ) {
|
for ( PileupElement p : pileup ) {
|
||||||
// ignore deletions
|
if ( usableBase(p, ignoreBadBases) ) {
|
||||||
if ( p.isDeletion() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
byte base = p.getBase();
|
|
||||||
if ( ! ignoreBadBases || ! badBase(base) ) {
|
|
||||||
byte qual = capBaseQualsAtMappingQual ? (byte)Math.min((int)p.getQual(), p.getMappingQual()) : p.getQual();
|
byte qual = capBaseQualsAtMappingQual ? (byte)Math.min((int)p.getQual(), p.getMappingQual()) : p.getQual();
|
||||||
n += add(base, qual, p.getRead(), p.getOffset());
|
n += add(p.getBase(), qual, p.getRead(), p.getOffset());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -452,6 +448,22 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
return fwdStrand ? 0 : 1;
|
return fwdStrand ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true when the observedBase is considered usable.
|
||||||
|
* @param p pileup element
|
||||||
|
* @param ignoreBadBases should we ignore bad bases?
|
||||||
|
* @return true if the base is a usable base
|
||||||
|
*/
|
||||||
|
protected static boolean usableBase(PileupElement p, boolean ignoreBadBases) {
|
||||||
|
// ignore deletions and filtered bases
|
||||||
|
if ( p.isDeletion() ||
|
||||||
|
(p.getRead() instanceof GATKSAMRecord &&
|
||||||
|
!((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset())) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return ( !ignoreBadBases || !badBase(p.getBase()) );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
||||||
* is considered bad if:
|
* is considered bad if:
|
||||||
|
|
@ -461,7 +473,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
* @param observedBase observed base
|
* @param observedBase observed base
|
||||||
* @return true if the base is a bad base
|
* @return true if the base is a bad base
|
||||||
*/
|
*/
|
||||||
protected boolean badBase(byte observedBase) {
|
protected static boolean badBase(byte observedBase) {
|
||||||
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.baseModel, (DiploidSNPGenotypePriors)priors, UAC.defaultPlatform);
|
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.baseModel, (DiploidSNPGenotypePriors)priors, UAC.defaultPlatform);
|
||||||
GL.add(pileup, true, UAC.CAP_BASE_QUALITY);
|
int nGoodBases = GL.add(pileup, true, UAC.CAP_BASE_QUALITY);
|
||||||
|
if ( nGoodBases == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
double[] likelihoods = GL.getLikelihoods();
|
double[] likelihoods = GL.getLikelihoods();
|
||||||
double[] posteriors = GL.getPosteriors();
|
double[] posteriors = GL.getPosteriors();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,12 +40,18 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecordFilter;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broad.tribble.vcf.VCFConstants;
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
|
||||||
public class UnifiedGenotyperEngine {
|
public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
|
|
@ -77,12 +83,17 @@ public class UnifiedGenotyperEngine {
|
||||||
private Logger logger = null;
|
private Logger logger = null;
|
||||||
private PrintStream verboseWriter = null;
|
private PrintStream verboseWriter = null;
|
||||||
|
|
||||||
|
// fasta reference reader to supplement the edges of the reference sequence for long reads
|
||||||
|
private IndexedFastaSequenceFile referenceReader;
|
||||||
|
|
||||||
// number of chromosomes (2 * samples) in input
|
// number of chromosomes (2 * samples) in input
|
||||||
private int N;
|
private int N;
|
||||||
|
|
||||||
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
||||||
private static final Set<String> filter = new HashSet<String>(1);
|
private static final Set<String> filter = new HashSet<String>(1);
|
||||||
|
|
||||||
|
private static final int MISMATCH_WINDOW_SIZE = 20;
|
||||||
|
|
||||||
|
|
||||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||||
// get the number of samples
|
// get the number of samples
|
||||||
|
|
@ -92,14 +103,14 @@ public class UnifiedGenotyperEngine {
|
||||||
numSamples = 1;
|
numSamples = 1;
|
||||||
else
|
else
|
||||||
numSamples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size();
|
numSamples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size();
|
||||||
initialize(UAC, null, null, null, numSamples);
|
initialize(toolkit, UAC, null, null, null, numSamples);
|
||||||
}
|
}
|
||||||
|
|
||||||
public UnifiedGenotyperEngine(UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
||||||
initialize(UAC, logger, verboseWriter, engine, numSamples);
|
initialize(toolkit, UAC, logger, verboseWriter, engine, numSamples);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initialize(UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
||||||
this.UAC = UAC;
|
this.UAC = UAC;
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
this.verboseWriter = verboseWriter;
|
this.verboseWriter = verboseWriter;
|
||||||
|
|
@ -111,6 +122,8 @@ public class UnifiedGenotyperEngine {
|
||||||
genotypePriors = createGenotypePriors(UAC);
|
genotypePriors = createGenotypePriors(UAC);
|
||||||
|
|
||||||
filter.add(LOW_QUAL_FILTER_NAME);
|
filter.add(LOW_QUAL_FILTER_NAME);
|
||||||
|
|
||||||
|
referenceReader = new IndexedFastaSequenceFile(toolkit.getArguments().referenceFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -130,7 +143,7 @@ public class UnifiedGenotyperEngine {
|
||||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||||
}
|
}
|
||||||
|
|
||||||
BadReadPileupFilter badReadPileupFilter = new BadReadPileupFilter(refContext);
|
BadBaseFilter badReadPileupFilter = new BadBaseFilter(refContext, UAC);
|
||||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, badReadPileupFilter);
|
Map<String, StratifiedAlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, badReadPileupFilter);
|
||||||
if ( stratifiedContexts == null )
|
if ( stratifiedContexts == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -280,7 +293,7 @@ public class UnifiedGenotyperEngine {
|
||||||
return ( d >= 0.0 && d <= 1.0 );
|
return ( d >= 0.0 && d <= 1.0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, StratifiedAlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, BadReadPileupFilter badReadPileupFilter) {
|
private Map<String, StratifiedAlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, BadBaseFilter badBaseFilter) {
|
||||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = null;
|
Map<String, StratifiedAlignmentContext> stratifiedContexts = null;
|
||||||
|
|
||||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL && rawContext.hasExtendedEventPileup() ) {
|
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL && rawContext.hasExtendedEventPileup() ) {
|
||||||
|
|
@ -290,9 +303,6 @@ public class UnifiedGenotyperEngine {
|
||||||
// filter the context based on min mapping quality
|
// filter the context based on min mapping quality
|
||||||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
// filter the context based on bad mates and mismatch rate
|
|
||||||
pileup = pileup.getFilteredPileup(badReadPileupFilter);
|
|
||||||
|
|
||||||
// don't call when there is no coverage
|
// don't call when there is no coverage
|
||||||
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -306,13 +316,13 @@ public class UnifiedGenotyperEngine {
|
||||||
if ( !BaseUtils.isRegularBase(ref) )
|
if ( !BaseUtils.isRegularBase(ref) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||||
|
|
||||||
// filter the context based on min base and mapping qualities
|
// filter the reads
|
||||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
filterPileup(pileup, badBaseFilter);
|
||||||
|
|
||||||
// filter the context based on bad mates and mismatch rate
|
// filter the context based on bad mates and mismatch rate
|
||||||
pileup = pileup.getFilteredPileup(badReadPileupFilter);
|
//pileup = pileup.getFilteredPileup(badReadPileupFilter);
|
||||||
|
|
||||||
// don't call when there is no coverage
|
// don't call when there is no coverage
|
||||||
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
||||||
|
|
@ -395,23 +405,55 @@ public class UnifiedGenotyperEngine {
|
||||||
verboseWriter.println();
|
verboseWriter.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private void filterPileup(ReadBackedPileup pileup, BadBaseFilter badBaseFilter) {
|
||||||
* Filters low quality reads out of the pileup.
|
for ( PileupElement p : pileup ) {
|
||||||
*/
|
final SAMRecord read = p.getRead();
|
||||||
private class BadReadPileupFilter implements PileupElementFilter {
|
if ( read instanceof GATKSAMRecord )
|
||||||
private ReferenceContext refContext;
|
((GATKSAMRecord)read).setGoodBases(badBaseFilter, true);
|
||||||
|
}
|
||||||
public BadReadPileupFilter(ReferenceContext ref) {
|
|
||||||
// create the +/-20bp window
|
|
||||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(ref.getLocus().getContig(), Math.max(ref.getWindow().getStart(), ref.getLocus().getStart()-20), Math.min(ref.getWindow().getStop(), ref.getLocus().getStart()+20));
|
|
||||||
byte[] bases = new byte[41];
|
|
||||||
System.arraycopy(ref.getBases(), (int)Math.max(0, window.getStart()-ref.getWindow().getStart()), bases, 0, (int)window.size());
|
|
||||||
refContext = new ReferenceContext(ref.getLocus(), window, bases);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean allow(PileupElement pileupElement) {
|
/**
|
||||||
return ((UAC.USE_BADLY_MATED_READS || !BadMateFilter.hasBadMate(pileupElement.getRead())) &&
|
* Filters low quality bases out of the SAMRecords.
|
||||||
AlignmentUtils.mismatchesInRefWindow(pileupElement, refContext, true) <= UAC.MAX_MISMATCHES );
|
*/
|
||||||
|
private class BadBaseFilter implements GATKSAMRecordFilter {
|
||||||
|
private ReferenceContext refContext;
|
||||||
|
private final UnifiedArgumentCollection UAC;
|
||||||
|
|
||||||
|
public BadBaseFilter(ReferenceContext refContext, UnifiedArgumentCollection UAC) {
|
||||||
|
this.refContext = refContext;
|
||||||
|
this.UAC = UAC;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BitSet getGoodBases(final GATKSAMRecord record) {
|
||||||
|
// all bits are set to false by default
|
||||||
|
BitSet bitset = new BitSet(record.getReadLength());
|
||||||
|
|
||||||
|
// if the mapping quality is too low or the mate is bad, we can just zero out the whole read and continue
|
||||||
|
if ( record.getMappingQuality() < UAC.MIN_MAPPING_QUALTY_SCORE ||
|
||||||
|
(!UAC.USE_BADLY_MATED_READS && BadMateFilter.hasBadMate(record)) ) {
|
||||||
|
return bitset;
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] quals = record.getBaseQualities();
|
||||||
|
for (int i = 0; i < quals.length; i++) {
|
||||||
|
if ( quals[i] >= UAC.MIN_BASE_QUALTY_SCORE )
|
||||||
|
bitset.set(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if a read is too long for the reference context, extend the context
|
||||||
|
if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) {
|
||||||
|
GenomeLoc window = GenomeLocParser.createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), record.getAlignmentEnd());
|
||||||
|
byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases();
|
||||||
|
StringUtil.toUpperCase(bases);
|
||||||
|
refContext = new ReferenceContext(refContext.getLocus(), window, bases);
|
||||||
|
}
|
||||||
|
|
||||||
|
BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE);
|
||||||
|
if ( mismatches != null )
|
||||||
|
bitset.and(mismatches);
|
||||||
|
|
||||||
|
return bitset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -128,7 +128,7 @@ public class UnifiedGenotyperV2 extends LocusWalker<VariantCallContext, UnifiedG
|
||||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
||||||
|
|
||||||
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
|
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
|
||||||
UG_engine = new UnifiedGenotyperEngine(UAC, logger, verboseWriter, annotationEngine, samples.size());
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples.size());
|
||||||
|
|
||||||
// initialize the header
|
// initialize the header
|
||||||
writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ;
|
writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ;
|
||||||
|
|
|
||||||
|
|
@ -258,18 +258,16 @@ public class AlignmentUtils {
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
BitSet mismatches = new BitSet(readLength);
|
BitSet mismatches = new BitSet(readLength);
|
||||||
|
|
||||||
// TODO -- we only care about starting from curpos
|
// it's possible we aren't starting at the beginning of a read,
|
||||||
|
// and we don't need to look at any of the previous context outside our window
|
||||||
|
// (although we do need future context)
|
||||||
|
int readStartPos = Math.max(read.getAlignmentStart(), (int)ref.getLocus().getStart() - windowSize);
|
||||||
|
int currentReadPos = read.getAlignmentStart();
|
||||||
|
|
||||||
byte[] refBases = ref.getBases();
|
byte[] refBases = ref.getBases();
|
||||||
int refIndex = read.getAlignmentStart() - (int)ref.getWindow().getStart();
|
int refIndex = readStartPos - (int)ref.getWindow().getStart();
|
||||||
// it's possible we aren't starting at the beginning of a read
|
|
||||||
int startOffset = 0;
|
|
||||||
if ( refIndex < 0 ) {
|
if ( refIndex < 0 ) {
|
||||||
startOffset = -1 * refIndex;
|
throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus());
|
||||||
refIndex = 0;
|
|
||||||
|
|
||||||
// TODO -- fix me
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] readBases = read.getReadBases();
|
byte[] readBases = read.getReadBases();
|
||||||
|
|
@ -282,15 +280,21 @@ public class AlignmentUtils {
|
||||||
int cigarElementLength = ce.getLength();
|
int cigarElementLength = ce.getLength();
|
||||||
switch ( ce.getOperator() ) {
|
switch ( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for (int j = 0; j < cigarElementLength; j++, readIndex++, refIndex++) {
|
for (int j = 0; j < cigarElementLength; j++, readIndex++) {
|
||||||
|
// skip over unwanted bases
|
||||||
|
if ( currentReadPos++ < readStartPos )
|
||||||
|
continue;
|
||||||
|
|
||||||
if ( refIndex >= refBases.length ) {
|
if ( refIndex >= refBases.length ) {
|
||||||
// TODO -- fix me
|
throw new IllegalStateException("When calculating mismatches, we somehow don't have enough trailing reference context for read " + read.getReadName() + " at position " + ref.getLocus());
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
byte refChr = refBases[refIndex];
|
byte refChr = refBases[refIndex];
|
||||||
byte readChr = readBases[readIndex];
|
byte readChr = readBases[readIndex];
|
||||||
if ( readChr != refChr )
|
if ( readChr != refChr )
|
||||||
mismatches.set(readIndex);
|
mismatches.set(readIndex);
|
||||||
|
|
||||||
|
refIndex++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
|
|
@ -299,7 +303,9 @@ public class AlignmentUtils {
|
||||||
break;
|
break;
|
||||||
case D:
|
case D:
|
||||||
case N:
|
case N:
|
||||||
|
if ( currentReadPos >= readStartPos )
|
||||||
refIndex += cigarElementLength;
|
refIndex += cigarElementLength;
|
||||||
|
currentReadPos += cigarElementLength;
|
||||||
break;
|
break;
|
||||||
case H:
|
case H:
|
||||||
case P:
|
case P:
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,8 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import java.lang.reflect.Method;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -18,11 +16,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
* if they are ever modified externally then one must also invoke the
|
* if they are ever modified externally then one must also invoke the
|
||||||
* setReadGroup() method here to ensure that the cache is kept up-to-date.
|
* setReadGroup() method here to ensure that the cache is kept up-to-date.
|
||||||
*
|
*
|
||||||
* 13 Oct 2010 - mhanna - this class is fundamentally flawed: it uses a decorator
|
|
||||||
* pattern to wrap a heavyweight object, which can lead
|
|
||||||
* to heinous side effects if the wrapping is not carefully
|
|
||||||
* done. Hopefully SAMRecord will become an interface and
|
|
||||||
* this will eventually be fixed.
|
|
||||||
*/
|
*/
|
||||||
public class GATKSAMRecord extends SAMRecord {
|
public class GATKSAMRecord extends SAMRecord {
|
||||||
|
|
||||||
|
|
@ -83,7 +76,7 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isGoodBase(int index) {
|
public boolean isGoodBase(int index) {
|
||||||
return ( mBitSet == null || mBitSet.length() <= index ? true : mBitSet.get(index));
|
return ( mBitSet == null || mBitSet.size() <= index ? true : mBitSet.get(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -334,76 +327,6 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
|
|
||||||
public void setValidationStringency(net.sf.samtools.SAMFileReader.ValidationStringency validationStringency) { mRecord.setValidationStringency(validationStringency); }
|
public void setValidationStringency(net.sf.samtools.SAMFileReader.ValidationStringency validationStringency) { mRecord.setValidationStringency(validationStringency); }
|
||||||
|
|
||||||
public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); }
|
|
||||||
|
|
||||||
public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); }
|
|
||||||
|
|
||||||
public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); }
|
|
||||||
|
|
||||||
public Byte getByteAttribute(final String tag) { return mRecord.getByteAttribute(tag); }
|
|
||||||
|
|
||||||
public String getStringAttribute(final String tag) { return mRecord.getStringAttribute(tag); }
|
|
||||||
|
|
||||||
public Character getCharacterAttribute(final String tag) { return mRecord.getCharacterAttribute(tag); }
|
|
||||||
|
|
||||||
public Float getFloatAttribute(final String tag) { return mRecord.getFloatAttribute(tag); }
|
|
||||||
|
|
||||||
public byte[] getByteArrayAttribute(final String tag) { return mRecord.getByteArrayAttribute(tag); }
|
|
||||||
|
|
||||||
protected Object getAttribute(final short tag) {
|
|
||||||
Object attribute;
|
|
||||||
try {
|
|
||||||
Method method = mRecord.getClass().getDeclaredMethod("getAttribute",Short.TYPE);
|
|
||||||
method.setAccessible(true);
|
|
||||||
attribute = method.invoke(mRecord,tag);
|
|
||||||
}
|
|
||||||
catch(Exception ex) {
|
|
||||||
throw new ReviewedStingException("Unable to invoke getAttribute method",ex);
|
|
||||||
}
|
|
||||||
return attribute;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAttribute(final String tag, final Object value) { mRecord.setAttribute(tag,value); }
|
|
||||||
|
|
||||||
protected void setAttribute(final short tag, final Object value) {
|
|
||||||
try {
|
|
||||||
Method method = mRecord.getClass().getDeclaredMethod("setAttribute",Short.TYPE,Object.class);
|
|
||||||
method.setAccessible(true);
|
|
||||||
method.invoke(mRecord,tag,value);
|
|
||||||
}
|
|
||||||
catch(Exception ex) {
|
|
||||||
throw new ReviewedStingException("Unable to invoke setAttribute method",ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clearAttributes() { mRecord.clearAttributes(); }
|
|
||||||
|
|
||||||
protected void setAttributes(final SAMBinaryTagAndValue attributes) {
|
|
||||||
try {
|
|
||||||
Method method = mRecord.getClass().getDeclaredMethod("setAttributes",SAMBinaryTagAndValue.class);
|
|
||||||
method.setAccessible(true);
|
|
||||||
method.invoke(mRecord,attributes);
|
|
||||||
}
|
|
||||||
catch(Exception ex) {
|
|
||||||
throw new ReviewedStingException("Unable to invoke setAttributes method",ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected SAMBinaryTagAndValue getBinaryAttributes() {
|
|
||||||
SAMBinaryTagAndValue binaryAttributes;
|
|
||||||
try {
|
|
||||||
Method method = mRecord.getClass().getDeclaredMethod("getBinaryAttributes");
|
|
||||||
method.setAccessible(true);
|
|
||||||
binaryAttributes = (SAMBinaryTagAndValue)method.invoke(mRecord);
|
|
||||||
}
|
|
||||||
catch(Exception ex) {
|
|
||||||
throw new ReviewedStingException("Unable to invoke getBinaryAttributes method",ex);
|
|
||||||
}
|
|
||||||
return binaryAttributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<SAMTagAndValue> getAttributes() { return mRecord.getAttributes(); }
|
|
||||||
|
|
||||||
public SAMFileHeader getHeader() { return mRecord.getHeader(); }
|
public SAMFileHeader getHeader() { return mRecord.getHeader(); }
|
||||||
|
|
||||||
public void setHeader(SAMFileHeader samFileHeader) { mRecord.setHeader(samFileHeader); }
|
public void setHeader(SAMFileHeader samFileHeader) { mRecord.setHeader(samFileHeader); }
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue