UnifiedGenotyper now BAQs via ADD_TAG to facilitate using BAQed quals for GL calculations but unBAQed quals for annotation calculations. UnifiedGenotyper now produces SNP and indel calls simultaneously. 40 base mismatch intrinsic filter removed from UG to greatly simplify the code. RankSumTests are now standard annotations but the integration tests are commented out pending changes that will allow random annotations to work.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5585 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1aa4083352
commit
3f3f35dea0
|
|
@ -380,7 +380,7 @@ public class VariantContextUtils {
|
|||
return new VariantContext(master.getSource(), master.getChr(), master.getStart(), master.getEnd(), master.getAlleles(), genotypes, master.getNegLog10PError(), master.getFilters(), masterAttributes);
|
||||
}
|
||||
|
||||
private static final VariantContext findMaster(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
private static VariantContext findMaster(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||
for (VariantContext vc : unsortedVCs) {
|
||||
if (vc.getSource().equals(masterName)) {
|
||||
return vc;
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import java.util.List;
|
|||
import java.util.Arrays;
|
||||
|
||||
|
||||
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class DepthOfCoverage implements InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ import org.broadinstitute.sting.utils.pileup.*;
|
|||
import java.util.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
|
|
@ -49,12 +48,6 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||
private final static char REGEXP_WILDCARD = '.';
|
||||
|
||||
public boolean useRead(PileupElement p) {
|
||||
//return true; // Use all reads
|
||||
//return !ReadUtils.is454Read(p.getRead()); // Use all non-454 reads
|
||||
return p.getOffset() == -1 || ((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset()); // Use all reads from the filtered context
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( !vc.isBiallelic() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||
return null;
|
||||
|
|
@ -84,18 +77,16 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
for ( final Map.Entry<String, Genotype> genotype : genotypes ) {
|
||||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getKey());
|
||||
if ( thisContext != null ) {
|
||||
final AlignmentContext aContext = thisContext;
|
||||
final ReadBackedPileup thisPileup;
|
||||
if (aContext.hasExtendedEventPileup())
|
||||
thisPileup = aContext.getExtendedEventPileup();
|
||||
else if (aContext.hasBasePileup())
|
||||
thisPileup = aContext.getBasePileup();
|
||||
if (thisContext.hasExtendedEventPileup())
|
||||
thisPileup = thisContext.getExtendedEventPileup();
|
||||
else if (thisContext.hasBasePileup())
|
||||
thisPileup = thisContext.getBasePileup();
|
||||
else
|
||||
thisPileup = null;
|
||||
|
||||
if (thisPileup != null) {
|
||||
double thisScore = scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus);
|
||||
scoreRA.add(thisScore); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
scoreRA.add( scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus) ); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -125,10 +116,8 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
final PriorityQueue<Haplotype> consensusHaplotypeQueue = new PriorityQueue<Haplotype>(MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator());
|
||||
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( useRead(p) ) {
|
||||
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
||||
candidateHaplotypeQueue.add(haplotypeFromRead);
|
||||
}
|
||||
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
||||
candidateHaplotypeQueue.add(haplotypeFromRead);
|
||||
}
|
||||
|
||||
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
||||
|
|
@ -174,12 +163,9 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
||||
final SAMRecord read = p.getRead();
|
||||
int readOffsetFromPileup = p.getOffset();
|
||||
|
||||
final byte[] haplotypeBases = new byte[contextSize];
|
||||
|
||||
for(int i=0; i < contextSize; i++) {
|
||||
haplotypeBases[i] = (byte)REGEXP_WILDCARD;
|
||||
}
|
||||
|
||||
Arrays.fill(haplotypeBases, (byte)REGEXP_WILDCARD);
|
||||
final double[] baseQualities = new double[contextSize];
|
||||
Arrays.fill(baseQualities, 0.0);
|
||||
|
||||
|
|
@ -200,8 +186,9 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
break;
|
||||
}
|
||||
if( readQuals[baseOffset] == PileupElement.DELETION_BASE) { readQuals[baseOffset] = PileupElement.DELETION_QUAL; }
|
||||
if( readBases[baseOffset] == BaseUtils.N ) { readBases[baseOffset] = (byte)REGEXP_WILDCARD; readQuals[baseOffset] = (byte) 4; } // N's shouldn't be treated as distinct bases
|
||||
if( ((double)readQuals[baseOffset]) < 4.0 ) { readQuals[baseOffset] = (byte) 4; } // quals less than 4 are used as codes and don't have actual probabilistic meaning behind them
|
||||
if( !BaseUtils.isRegularBase(readBases[baseOffset]) ) { readBases[baseOffset] = (byte)REGEXP_WILDCARD; readQuals[baseOffset] = (byte) 0; } // N's shouldn't be treated as distinct bases
|
||||
readQuals[baseOffset] = (byte)Math.min((int)readQuals[baseOffset], p.getMappingQual());
|
||||
if( ((int)readQuals[baseOffset]) < 5 ) { readQuals[baseOffset] = (byte) 0; } // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
|
||||
haplotypeBases[i] = readBases[baseOffset];
|
||||
baseQualities[i] = (double)readQuals[baseOffset];
|
||||
}
|
||||
|
|
@ -256,8 +243,8 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
|
||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
||||
private double scoreReadsAgainstHaplotypes(final List<Haplotype> haplotypes, final ReadBackedPileup pileup, final int contextSize, final int locus) {
|
||||
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
||||
// if ( DEBUG ) System.out.printf("HAP2: %s%n", haplotypes.get(1));
|
||||
if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
||||
if ( DEBUG ) System.out.printf("HAP2: %s%n", haplotypes.get(1));
|
||||
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||
for ( final PileupElement p : pileup ) {
|
||||
|
|
@ -317,27 +304,23 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
|||
final byte haplotypeBase = haplotypeBases[i];
|
||||
final byte readBase = readBases[baseOffset];
|
||||
|
||||
|
||||
final boolean matched = (readBase == haplotypeBase); // Purposefully counting wildcards in the chosen haplotype as a mismatch
|
||||
final boolean matched = ( readBase == haplotypeBase || haplotypeBase == (byte)REGEXP_WILDCARD );
|
||||
byte qual = readQuals[baseOffset];
|
||||
if( qual == PileupElement.DELETION_BASE ) { qual = PileupElement.DELETION_QUAL; }
|
||||
if( ((double) qual) < 4.0 ) { qual = (byte) 4; } // quals less than 4 are used as codes and don't have actual probabilistic meaning behind them
|
||||
final double e = QualityUtils.qualToErrorProb(qual);
|
||||
expected += e;
|
||||
mismatches += matched ? e : 1.0 - e / 3.0;
|
||||
if( qual == PileupElement.DELETION_BASE ) { qual = PileupElement.DELETION_QUAL; } // calcAlignmentByteArrayOffset fills the readQuals array with DELETION_BASE at deletions
|
||||
qual = (byte)Math.min((int)qual, p.getMappingQual());
|
||||
if( ((int) qual) >= 5 ) { // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
|
||||
final double e = QualityUtils.qualToErrorProb(qual);
|
||||
expected += e;
|
||||
mismatches += matched ? e : 1.0 - e / 3.0;
|
||||
}
|
||||
|
||||
// a more sophisticated calculation would include the reference quality, but it's nice to actually penalize
|
||||
// the mismatching of poorly determined regions of the consensus
|
||||
|
||||
// if ( DEBUG ) {
|
||||
// System.out.printf("Read %s: scoring %c vs. %c => e = %f Q%d esum %f vs. msum %f%n",
|
||||
// read.getReadName(), (char)haplotypeBase, (char)readBase, e, readQuals[baseOffset], expected, mismatches);
|
||||
// }
|
||||
}
|
||||
|
||||
return mismatches - expected;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with two (and only two) segregating haplotypes")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes")); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -10,7 +9,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -28,7 +26,6 @@ public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotatio
|
|||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
double qual = 0.0;
|
||||
int depth = 0;
|
||||
|
||||
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
|
|
@ -42,47 +39,21 @@ public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotatio
|
|||
continue;
|
||||
|
||||
depth += context.size();
|
||||
|
||||
if ( genotype.getValue().hasLikelihoods() ) {
|
||||
GenotypeLikelihoods GLs = genotype.getValue().getLikelihoods();
|
||||
qual += 10.0 * getQual(GLs.getAsVector());
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
return null;
|
||||
|
||||
if ( qual == 0.0 )
|
||||
qual = 10.0 * vc.getNegLog10PError();
|
||||
|
||||
double sumGLbyD = qual / (double)depth;
|
||||
|
||||
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||
double QD = 10.0 * vc.getNegLog10PError() / (double)qDepth;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
map.put(getKeyNames().get(1), String.format("%.2f", sumGLbyD));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD", "sumGLbyD"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
||||
|
||||
private double getQual(double[] GLs) {
|
||||
if ( GLs == null )
|
||||
return 0.0;
|
||||
|
||||
// normalize so that we don't have precision issues
|
||||
double[] adjustedLikelihoods = new double[GLs.length];
|
||||
double maxValue = Utils.findMaxEntry(GLs);
|
||||
for (int i = 0; i < GLs.length; i++)
|
||||
adjustedLikelihoods[i] = GLs[i] - maxValue;
|
||||
|
||||
// AB + BB (in real space)
|
||||
double variantWeight = Math.pow(10, adjustedLikelihoods[1]) + Math.pow(10, adjustedLikelihoods[2]);
|
||||
// (AB + BB) / AA (in log space)
|
||||
return Math.log10(variantWeight) - adjustedLikelihoods[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -17,7 +17,7 @@ import java.util.Map;
|
|||
import java.util.HashMap;
|
||||
|
||||
|
||||
public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAnnotation {
|
||||
public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnotation {
|
||||
private static final double minPValue = 1e-20;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ public class VariantAnnotatorEngine {
|
|||
rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||
else if (vc.isIndel())
|
||||
rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID == null ? false : true);
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null );
|
||||
// annotate dbsnp id if available and not already there
|
||||
if ( rsID != null && (!vc.hasID() || vc.getID().equals(VCFConstants.EMPTY_ID_FIELD)) )
|
||||
infoAnnotations.put(VariantContext.ID_KEY, rsID);
|
||||
|
|
@ -216,7 +216,7 @@ public class VariantAnnotatorEngine {
|
|||
break;
|
||||
}
|
||||
}
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp ? true : false);
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -121,6 +122,9 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
|
||||
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
|
||||
SAMRecord read = p.getRead();
|
||||
if(ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DEBUG && p.isIndel()) {
|
||||
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
|
||||
|
|
@ -271,7 +275,8 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, BiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse) {
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ import net.sf.samtools.SAMUtils;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
|
|
@ -244,10 +243,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
public int add(ReadBackedPileup pileup) {
|
||||
return add(pileup, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates likelihoods and posteriors to reflect the additional observations contained within the
|
||||
* read-based pileup up by calling add(observedBase, qualityScore) for each base / qual in the
|
||||
|
|
@ -256,33 +251,34 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
* @param pileup read pileup
|
||||
* @param ignoreBadBases should we ignore bad bases?
|
||||
* @param capBaseQualsAtMappingQual should we cap a base's quality by its read's mapping quality?
|
||||
* @param minBaseQual the minimum base quality at which to consider a base valid
|
||||
* @return the number of good bases found in the pileup
|
||||
*/
|
||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
||||
int n = 0;
|
||||
|
||||
// for each fragment, add to the likelihoods
|
||||
FragmentPileup fpile = new FragmentPileup(pileup);
|
||||
|
||||
for ( PileupElement p : fpile.getOneReadPileup() )
|
||||
n += add(p, ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
n += add(p, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||
|
||||
for ( FragmentPileup.TwoReadPileupElement twoRead : fpile.getTwoReadPileup() )
|
||||
n += add(twoRead, ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
n += add(twoRead, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||
|
||||
return n;
|
||||
}
|
||||
public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
||||
byte obsBase = elt.getBase();
|
||||
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0) : 0;
|
||||
}
|
||||
|
||||
public int add(FragmentPileup.TwoReadPileupElement twoRead, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
public int add(FragmentPileup.TwoReadPileupElement twoRead, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
||||
final byte observedBase1 = twoRead.getFirst().getBase();
|
||||
final byte qualityScore1 = qualToUse(twoRead.getFirst(), ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
final byte qualityScore1 = qualToUse(twoRead.getFirst(), ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||
final byte observedBase2 = twoRead.getSecond().getBase();
|
||||
final byte qualityScore2 = qualToUse(twoRead.getSecond(), ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
final byte qualityScore2 = qualToUse(twoRead.getSecond(), ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||
|
||||
if ( qualityScore1 == 0 ) {
|
||||
if ( qualityScore2 == 0 ) // abort early if we didn't see any good bases
|
||||
|
|
@ -492,10 +488,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
* @param p
|
||||
* @param ignoreBadBases
|
||||
* @param capBaseQualsAtMappingQual
|
||||
* @param minBaseQual
|
||||
* @return
|
||||
*/
|
||||
private final static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
if ( ! usableBase(p, ignoreBadBases) ) {
|
||||
private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
||||
if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) ) {
|
||||
return 0;
|
||||
} else {
|
||||
byte qual = p.getQual();
|
||||
|
|
@ -504,6 +501,8 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
|
||||
if ( capBaseQualsAtMappingQual )
|
||||
qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
|
||||
if ( (int)qual < minBaseQual )
|
||||
qual = (byte)0;
|
||||
|
||||
return qual;
|
||||
}
|
||||
|
|
@ -517,36 +516,6 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns true when the observedBase is considered usable.
|
||||
* @param p pileup element
|
||||
* @param ignoreBadBases should we ignore bad bases?
|
||||
* @return true if the base is a usable base
|
||||
*/
|
||||
protected static boolean usableBase(PileupElement p, boolean ignoreBadBases) {
|
||||
// ignore deletions, Q0 bases, and filtered bases
|
||||
if ( p.isDeletion() ||
|
||||
p.getQual() == 0 ||
|
||||
(p.getRead() instanceof GATKSAMRecord &&
|
||||
!((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset())) )
|
||||
return false;
|
||||
|
||||
return ( !ignoreBadBases || !badBase(p.getBase()) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when the observedBase is considered bad and shouldn't be processed by this object. A base
|
||||
* is considered bad if:
|
||||
*
|
||||
* Criterion 1: observed base isn't a A,C,T,G or lower case equivalent
|
||||
*
|
||||
* @param observedBase observed base
|
||||
* @return true if the base is a bad base
|
||||
*/
|
||||
protected static boolean badBase(byte observedBase) {
|
||||
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string representation of this object in a moderately usable form
|
||||
*
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -44,7 +45,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
|
||||
public enum Model {
|
||||
SNP,
|
||||
DINDEL
|
||||
INDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
public enum GENOTYPING_MODE {
|
||||
|
|
@ -67,6 +69,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
*
|
||||
* @param tracker rod data
|
||||
* @param ref reference context
|
||||
* @param contexts stratified alignment contexts
|
||||
|
|
@ -75,6 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
* @param GLs hash of sample->GL to fill in
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
*
|
||||
* @param useBAQedPileup
|
||||
* @return genotype likelihoods per sample for AA, AB, BB
|
||||
*/
|
||||
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
|
|
@ -83,12 +87,12 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, BiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse);
|
||||
Allele alternateAlleleToUse, boolean useBAQedPileup);
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) )
|
||||
if ( BaseUtils.isRegularBase( p.getBase() ) )
|
||||
count++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -37,7 +38,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -59,7 +60,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, BiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse) {
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
|
||||
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
||||
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
||||
|
|
@ -101,10 +103,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); }
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||
int nGoodBases = GL.add(pileup, true, true);
|
||||
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
if ( nGoodBases == 0 )
|
||||
continue;
|
||||
|
||||
|
|
@ -130,16 +133,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
// calculate the sum of quality scores for each base
|
||||
ReadBackedPileup pileup = sample.getValue().getBasePileup();
|
||||
ReadBackedPileup pileup = createBAQedPileup( sample.getValue().getBasePileup() );
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions and filtered bases
|
||||
if ( p.isDeletion() ||
|
||||
(p.getRead() instanceof GATKSAMRecord && !((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset())) )
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() || p.getQual() < UAC.MIN_BASE_QUALTY_SCORE )
|
||||
continue;
|
||||
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
||||
if ( index >= 0 )
|
||||
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
||||
if ( index >= 0 ) {
|
||||
qualCounts[index] += p.getQual();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -156,4 +159,23 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
|
||||
final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
|
||||
for( final PileupElement PE : pileup ) {
|
||||
final PileupElement newPE = new BAQedPileupElement( PE );
|
||||
BAQedElements.add( newPE );
|
||||
}
|
||||
return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
|
||||
}
|
||||
|
||||
public class BAQedPileupElement extends PileupElement {
|
||||
public BAQedPileupElement( final PileupElement PE ) {
|
||||
super(PE.getRead(), PE.getOffset());
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -45,6 +46,7 @@ import java.util.*;
|
|||
* Run this as you would the UnifiedGenotyper, except that you must additionally pass in a VCF bound to
|
||||
* the name 'allele' so we know which alternate allele to use at each site.
|
||||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@Requires(value={},referenceMetaData=@RMD(name="allele", type= VariantContext.class))
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.READS)
|
||||
|
|
@ -64,7 +66,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
|
|||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
// enable extended events for indels
|
||||
public boolean generateExtendedEvents() { return UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL; }
|
||||
public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
|
||||
|
||||
public void initialize() {
|
||||
// get all of the unique sample names
|
||||
|
|
@ -101,7 +103,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
|
|||
return null;
|
||||
}
|
||||
|
||||
return new VariantCallContext(UG_engine.calculateLikelihoods(tracker, refContext, rawContext, vc.getAlternateAllele(0)), refContext.getBase(), true);
|
||||
return new VariantCallContext(UG_engine.calculateLikelihoods(tracker, refContext, rawContext, vc.getAlternateAllele(0), true), refContext.getBase(), true);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ import org.broadinstitute.sting.commandline.Hidden;
|
|||
public class UnifiedArgumentCollection {
|
||||
|
||||
// control the various models to be used
|
||||
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while DINDEL is also available for calling indels.", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
|
||||
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
|
||||
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||
|
|
@ -78,12 +78,6 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false)
|
||||
public int MAX_MISMATCHES = 3;
|
||||
|
||||
@Argument(fullName = "use_reads_with_bad_mates", shortName = "bad_mates", doc = "Use reads whose mates are mapped excessively far away for calling", required = false)
|
||||
public boolean USE_BADLY_MATED_READS = false;
|
||||
|
||||
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
||||
public Double MAX_DELETION_FRACTION = 0.05;
|
||||
|
||||
|
|
@ -146,10 +140,13 @@ public class UnifiedArgumentCollection {
|
|||
private Boolean GENOTYPE_DEPRECATED = false;
|
||||
|
||||
|
||||
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
|
||||
public UnifiedArgumentCollection clone() {
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
|
||||
uac.GLmodel = GLmodel;
|
||||
uac.AFmodel = AFmodel;
|
||||
uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
|
||||
uac.heterozygosity = heterozygosity;
|
||||
uac.PCR_error = PCR_error;
|
||||
uac.GenotypingMode = GenotypingMode;
|
||||
|
|
@ -160,8 +157,6 @@ public class UnifiedArgumentCollection {
|
|||
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
|
||||
uac.MIN_MAPPING_QUALTY_SCORE = MIN_MAPPING_QUALTY_SCORE;
|
||||
uac.MAX_MISMATCHES = MAX_MISMATCHES;
|
||||
uac.USE_BADLY_MATED_READS = USE_BADLY_MATED_READS;
|
||||
uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
|
||||
uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
|
||||
|
|
@ -170,7 +165,6 @@ public class UnifiedArgumentCollection {
|
|||
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
||||
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
||||
uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
|
||||
|
||||
|
||||
// todo- arguments to remove
|
||||
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -46,15 +47,13 @@ import java.io.PrintStream;
|
|||
* A variant caller which unifies the approaches of several disparate callers. Works for single-sample and
|
||||
* multi-sample data. The user can choose from several different incorporated calculation models.
|
||||
*/
|
||||
// todo -- change when UG is generalized to do BAQ as necessary
|
||||
//@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@ReadFilters( {BadMateFilter.class} )
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.REFERENCE)
|
||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||
public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics> {
|
||||
|
||||
|
||||
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
|
||||
// control the output
|
||||
|
|
@ -83,7 +82,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
// enable extended events for indels
|
||||
public boolean generateExtendedEvents() { return UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL; }
|
||||
public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
|
||||
|
||||
/**
|
||||
* Inner class for collecting output statistics from the UG
|
||||
|
|
|
|||
|
|
@ -32,30 +32,17 @@ import org.broad.tribble.util.variantcontext.Genotype;
|
|||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecordFilter;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
|
||||
|
||||
public class UnifiedGenotyperEngine {
|
||||
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
|
@ -73,19 +60,21 @@ public class UnifiedGenotyperEngine {
|
|||
private VariantAnnotatorEngine annotationEngine;
|
||||
|
||||
// the model used for calculating genotypes
|
||||
private ThreadLocal<GenotypeLikelihoodsCalculationModel> glcm = new ThreadLocal<GenotypeLikelihoodsCalculationModel>();
|
||||
private ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>>();
|
||||
|
||||
// the model used for calculating p(non-ref)
|
||||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
||||
|
||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||
private double[] log10AlleleFrequencyPriors;
|
||||
private double[] log10AlleleFrequencyPriorsSNPs;
|
||||
private double[] log10AlleleFrequencyPriorsIndels;
|
||||
|
||||
// the allele frequency likelihoods (allocated once as an optimization)
|
||||
private ThreadLocal<double[]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[]>();
|
||||
|
||||
// the priors object
|
||||
private GenotypePriors genotypePriors;
|
||||
private GenotypePriors genotypePriorsSNPs;
|
||||
private GenotypePriors genotypePriorsIndels;
|
||||
|
||||
// samples in input
|
||||
private Set<String> samples = new TreeSet<String>();
|
||||
|
|
@ -94,18 +83,13 @@ public class UnifiedGenotyperEngine {
|
|||
private Logger logger = null;
|
||||
private PrintStream verboseWriter = null;
|
||||
|
||||
// fasta reference reader to supplement the edges of the reference sequence for long reads
|
||||
private IndexedFastaSequenceFile referenceReader;
|
||||
|
||||
// number of chromosomes (2 * samples) in input
|
||||
private int N;
|
||||
|
||||
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
||||
private static final Set<String> filter = new HashSet<String>(1);
|
||||
|
||||
private static final int MISMATCH_WINDOW_SIZE = 20;
|
||||
|
||||
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
// get the number of samples
|
||||
// if we're supposed to assume a single sample, do so
|
||||
|
|
@ -115,15 +99,15 @@ public class UnifiedGenotyperEngine {
|
|||
else
|
||||
samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
||||
|
||||
initialize(toolkit, UAC, null, null, null, samples.size());
|
||||
initialize(UAC, null, null, null, samples.size());
|
||||
}
|
||||
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
|
||||
this.samples = new TreeSet<String>(samples);
|
||||
initialize(toolkit, UAC, logger, verboseWriter, engine, samples.size());
|
||||
initialize(UAC, logger, verboseWriter, engine, samples.size());
|
||||
}
|
||||
|
||||
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
||||
private void initialize(UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
||||
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
||||
this.UAC = UAC.clone();
|
||||
this.UAC.MIN_MAPPING_QUALTY_SCORE = Math.max(UAC.MIN_MAPPING_QUALTY_SCORE, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
|
|
@ -133,22 +117,18 @@ public class UnifiedGenotyperEngine {
|
|||
this.annotationEngine = engine;
|
||||
|
||||
N = 2 * numSamples;
|
||||
log10AlleleFrequencyPriors = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N);
|
||||
genotypePriors = createGenotypePriors(UAC);
|
||||
|
||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
|
||||
filter.add(LOW_QUAL_FILTER_NAME);
|
||||
|
||||
try {
|
||||
referenceReader = new CachingIndexedFastaSequenceFile(toolkit.getArguments().referenceFile);
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new UserException.CouldNotReadInputFile(toolkit.getArguments().referenceFile,ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute full calls at a given locus.
|
||||
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
|
|
@ -165,34 +145,46 @@ public class UnifiedGenotyperEngine {
|
|||
&& UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES)
|
||||
return null;
|
||||
*/
|
||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
|
||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
|
||||
if( model == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||
if ( stratifiedContexts == null )
|
||||
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ? null : new VariantCallContext(generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext), refContext.getBase(), false));
|
||||
|
||||
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null);
|
||||
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||
|
||||
if ( vc == null )
|
||||
return null;
|
||||
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc);
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute GLs at a given locus.
|
||||
* Compute GLs at a given locus. Entry point for engine calls from UGCalcLikelihoods.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
* @param useBAQedPileup should we use the BAQed pileup?
|
||||
* @return the VariantContext object
|
||||
*/
|
||||
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Allele alternateAlleleToUse) {
|
||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
|
||||
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Allele alternateAlleleToUse, boolean useBAQedPileup) {
|
||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
|
||||
if( model == null ) {
|
||||
return null;
|
||||
}
|
||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, alternateAlleleToUse);
|
||||
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, alternateAlleleToUse, useBAQedPileup, model);
|
||||
}
|
||||
|
||||
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, AlignmentContext> stratifiedContexts, AlignmentContextUtils.ReadOrientation type, Allele alternateAlleleToUse) {
|
||||
// private method called by both UnifiedGenotyper and UGCalcLikelihoods entry points into the engine
|
||||
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, AlignmentContext> stratifiedContexts, AlignmentContextUtils.ReadOrientation type, Allele alternateAlleleToUse, boolean useBAQedPileup, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( glcm.get() == null ) {
|
||||
|
|
@ -201,7 +193,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
Map<String, BiallelicGenotypeLikelihoods> GLs = new HashMap<String, BiallelicGenotypeLikelihoods>();
|
||||
|
||||
Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs, alternateAlleleToUse);
|
||||
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup);
|
||||
|
||||
if ( refAllele != null )
|
||||
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
|
||||
|
|
@ -223,7 +215,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
if ( annotationEngine != null ) {
|
||||
// we want to use the *unfiltered* context for the annotations
|
||||
// we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
ReadBackedPileup pileup = null;
|
||||
if (rawContext.hasExtendedEventPileup())
|
||||
pileup = rawContext.getExtendedEventPileup();
|
||||
|
|
@ -278,7 +270,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute genotypes at a given locus.
|
||||
* Compute genotypes at a given locus. Entry point for engine calls from UGCallVariants.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
|
|
@ -287,11 +279,16 @@ public class UnifiedGenotyperEngine {
|
|||
* @return the VariantCallContext object
|
||||
*/
|
||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc);
|
||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext );
|
||||
if( model == null ) {
|
||||
return null;
|
||||
}
|
||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
||||
}
|
||||
|
||||
private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
|
||||
private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
|
|
@ -302,12 +299,12 @@ public class UnifiedGenotyperEngine {
|
|||
// estimate our confidence in a reference call and return
|
||||
if ( vc.getNSamples() == 0 )
|
||||
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ?
|
||||
estimateReferenceConfidence(vc, stratifiedContexts, genotypePriors.getHeterozygosity(), false, 1.0) :
|
||||
estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) :
|
||||
new VariantCallContext(generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext), refContext.getBase(), false));
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
|
||||
|
|
@ -341,7 +338,7 @@ public class UnifiedGenotyperEngine {
|
|||
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
|
||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||
return estimateReferenceConfidence(vc, stratifiedContexts, genotypePriors.getHeterozygosity(), true, 1.0 - PofF);
|
||||
return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
||||
}
|
||||
|
||||
// create the genotypes
|
||||
|
|
@ -349,7 +346,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null )
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors);
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
|
@ -363,23 +360,26 @@ public class UnifiedGenotyperEngine {
|
|||
final boolean DEBUG_SLOD = false;
|
||||
|
||||
// the overall lod
|
||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0));
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0));
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||
|
|
@ -395,7 +395,7 @@ public class UnifiedGenotyperEngine {
|
|||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
attributes.put("SB", Double.valueOf(strandScore));
|
||||
attributes.put("SB", strandScore);
|
||||
}
|
||||
|
||||
GenomeLoc loc = refContext.getLocus();
|
||||
|
|
@ -412,7 +412,7 @@ public class UnifiedGenotyperEngine {
|
|||
myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
|
||||
|
||||
if ( annotationEngine != null ) {
|
||||
// first off, we want to use the *unfiltered* context for the annotations
|
||||
// first off, we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
ReadBackedPileup pileup = null;
|
||||
if (rawContext.hasExtendedEventPileup())
|
||||
pileup = rawContext.getExtendedEventPileup();
|
||||
|
|
@ -447,16 +447,11 @@ public class UnifiedGenotyperEngine {
|
|||
return endLoc;
|
||||
}
|
||||
|
||||
private static boolean isValidDeletionFraction(double d) {
|
||||
return ( d >= 0.0 && d <= 1.0 );
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
BadBaseFilter badReadPileupFilter = new BadBaseFilter(refContext, UAC);
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL && rawContext.hasExtendedEventPileup() ) {
|
||||
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
|
||||
|
||||
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
|
||||
|
||||
|
|
@ -470,18 +465,21 @@ public class UnifiedGenotyperEngine {
|
|||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
|
||||
} else if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP && !rawContext.hasExtendedEventPileup() ) {
|
||||
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
||||
|
||||
byte ref = refContext.getBase();
|
||||
if ( !BaseUtils.isRegularBase(ref) )
|
||||
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
|
||||
|
||||
// filter the reads (and test for bad pileups)
|
||||
if ( !filterPileup(stratifiedContexts, badReadPileupFilter) )
|
||||
int numDeletions = 0;
|
||||
for( final PileupElement p : rawContext.getBasePileup() ) {
|
||||
if( p.isDeletion() ) { numDeletions++; }
|
||||
}
|
||||
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().size()) > UAC.MAX_DELETION_FRACTION ) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return stratifiedContexts;
|
||||
|
|
@ -509,7 +507,6 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
if (isCovered) {
|
||||
AlignmentContext context = contexts.get(sample);
|
||||
|
||||
if (context.hasBasePileup())
|
||||
depth = context.getBasePileup().size();
|
||||
else if (context.hasExtendedEventPileup())
|
||||
|
|
@ -522,7 +519,7 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
}
|
||||
|
||||
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, double[] normalizedPosteriors) {
|
||||
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, double[] normalizedPosteriors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
Allele refAllele = null, altAllele = null;
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
if ( allele.isReference() )
|
||||
|
|
@ -544,7 +541,7 @@ public class UnifiedGenotyperEngine {
|
|||
AFline.append("\t");
|
||||
AFline.append(i + "/" + N + "\t");
|
||||
AFline.append(String.format("%.2f\t", ((float)i)/N));
|
||||
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPriors[i]));
|
||||
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
|
||||
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
|
||||
AFline.append("0.00000000\t");
|
||||
else
|
||||
|
|
@ -558,92 +555,6 @@ public class UnifiedGenotyperEngine {
|
|||
verboseWriter.println();
|
||||
}
|
||||
|
||||
private boolean filterPileup(Map<String, AlignmentContext> stratifiedContexts, BadBaseFilter badBaseFilter) {
|
||||
int numDeletions = 0, pileupSize = 0;
|
||||
|
||||
for ( AlignmentContext context : stratifiedContexts.values() ) {
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
final SAMRecord read = p.getRead();
|
||||
|
||||
if ( p.isDeletion() ) {
|
||||
// if it's a good read, count it
|
||||
if ( read.getMappingQuality() >= UAC.MIN_MAPPING_QUALTY_SCORE &&
|
||||
(UAC.USE_BADLY_MATED_READS || !BadMateFilter.hasBadMate(read)) )
|
||||
numDeletions++;
|
||||
} else {
|
||||
if ( !(read instanceof GATKSAMRecord) )
|
||||
throw new ReviewedStingException("The UnifiedGenotyper currently expects GATKSAMRecords, but instead saw a " + read.getClass());
|
||||
GATKSAMRecord GATKrecord = (GATKSAMRecord)read;
|
||||
GATKrecord.setGoodBases(badBaseFilter, true);
|
||||
if ( GATKrecord.isGoodBase(p.getOffset()) )
|
||||
pileupSize++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// now, test for bad pileups
|
||||
|
||||
// in all_bases mode, it doesn't matter
|
||||
if ( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
return true;
|
||||
|
||||
// is there no coverage?
|
||||
if ( pileupSize == 0 )
|
||||
return false;
|
||||
|
||||
// are there too many deletions in the pileup?
|
||||
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)numDeletions / (double)(pileupSize + numDeletions) > UAC.MAX_DELETION_FRACTION )
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters low quality bases out of the SAMRecords.
|
||||
*/
|
||||
private class BadBaseFilter implements GATKSAMRecordFilter {
|
||||
private ReferenceContext refContext;
|
||||
private final UnifiedArgumentCollection UAC;
|
||||
|
||||
public BadBaseFilter(ReferenceContext refContext, UnifiedArgumentCollection UAC) {
|
||||
this.refContext = refContext;
|
||||
this.UAC = UAC;
|
||||
}
|
||||
|
||||
public BitSet getGoodBases(final GATKSAMRecord record) {
|
||||
// all bits are set to false by default
|
||||
BitSet bitset = new BitSet(record.getReadLength());
|
||||
|
||||
// if the mapping quality is too low or the mate is bad, we can just zero out the whole read and continue
|
||||
if ( record.getMappingQuality() < UAC.MIN_MAPPING_QUALTY_SCORE ||
|
||||
(!UAC.USE_BADLY_MATED_READS && BadMateFilter.hasBadMate(record)) ) {
|
||||
return bitset;
|
||||
}
|
||||
|
||||
byte[] quals = record.getBaseQualities();
|
||||
for (int i = 0; i < quals.length; i++) {
|
||||
if ( quals[i] >= UAC.MIN_BASE_QUALTY_SCORE )
|
||||
bitset.set(i);
|
||||
}
|
||||
|
||||
// if a read is too long for the reference context, extend the context (being sure not to extend past the end of the chromosome)
|
||||
if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) {
|
||||
GenomeLoc window = refContext.getGenomeLocParser().createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength()));
|
||||
byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases();
|
||||
StringUtil.toUpperCase(bases);
|
||||
refContext = new ReferenceContext(refContext.getGenomeLocParser(),refContext.getLocus(), window, bases);
|
||||
}
|
||||
|
||||
BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE);
|
||||
if ( mismatches != null )
|
||||
bitset.and(mismatches);
|
||||
|
||||
return bitset;
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean passesEmitThreshold(double conf, int bestAFguess) {
|
||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
}
|
||||
|
|
@ -657,55 +568,77 @@ public class UnifiedGenotyperEngine {
|
|||
(UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
||||
}
|
||||
|
||||
protected void computeAlleleFrequencyPriors(int N) {
|
||||
// decide whether we are currently processing SNPs, indels, or neither
|
||||
private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel( final AlignmentContext rawContext ) {
|
||||
if( (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) && rawContext.hasExtendedEventPileup() ) {
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
} else if( (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) && !rawContext.hasExtendedEventPileup() ) {
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
// calculate the allele frequency priors for 1-N
|
||||
double sum = 0.0;
|
||||
double heterozygosity;
|
||||
|
||||
if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL)
|
||||
if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
||||
heterozygosity = UAC.INDEL_HETEROZYGOSITY;
|
||||
else
|
||||
heterozygosity = UAC.heterozygosity;
|
||||
|
||||
for (int i = 1; i <= N; i++) {
|
||||
double value = heterozygosity / (double)i;
|
||||
log10AlleleFrequencyPriors[i] = Math.log10(value);
|
||||
priors[i] = Math.log10(value);
|
||||
sum += value;
|
||||
}
|
||||
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
log10AlleleFrequencyPriors[0] = Math.log10(1.0 - sum);
|
||||
priors[0] = Math.log10(1.0 - sum);
|
||||
}
|
||||
|
||||
private static GenotypePriors createGenotypePriors(UnifiedArgumentCollection UAC) {
|
||||
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
case INDEL:
|
||||
return log10AlleleFrequencyPriorsIndels;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
}
|
||||
|
||||
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
GenotypePriors priors;
|
||||
switch ( UAC.GLmodel ) {
|
||||
switch ( model ) {
|
||||
case SNP:
|
||||
// use flat priors for GLs
|
||||
priors = new DiploidSNPGenotypePriors();
|
||||
break;
|
||||
case DINDEL:
|
||||
case INDEL:
|
||||
// create flat priors for Indels, actual priors will depend on event length to be genotyped
|
||||
priors = new DiploidIndelGenotypePriors();
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + UAC.GLmodel);
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
|
||||
return priors;
|
||||
}
|
||||
|
||||
private static GenotypeLikelihoodsCalculationModel getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
GenotypeLikelihoodsCalculationModel glcm;
|
||||
switch ( UAC.GLmodel ) {
|
||||
protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
glcm = new SNPGenotypeLikelihoodsCalculationModel(UAC, logger);
|
||||
break;
|
||||
case DINDEL:
|
||||
glcm = new DindelGenotypeLikelihoodsCalculationModel(UAC, logger);
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + UAC.GLmodel);
|
||||
return genotypePriorsSNPs;
|
||||
case INDEL:
|
||||
return genotypePriorsIndels;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>();
|
||||
glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger));
|
||||
glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new DindelGenotypeLikelihoodsCalculationModel(UAC, logger));
|
||||
return glcm;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -423,7 +424,9 @@ public class HaplotypeIndelErrorModel {
|
|||
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
|
||||
int i=0;
|
||||
for (SAMRecord read : pileup.getReads()) {
|
||||
|
||||
if(ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
// for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
|
||||
// = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
|
||||
for (int j=0; j < haplotypesInVC.size(); j++) {
|
||||
|
|
|
|||
|
|
@ -623,6 +623,9 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
for (SAMRecord read : pileup.getReads()) {
|
||||
if(ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
|
||||
// = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.MutableVariantContext;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
|
|
@ -161,7 +159,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||
|
||||
// Adding the INDEL calling arguments for UG
|
||||
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.DINDEL;
|
||||
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||
|
||||
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
|||
for ( PileupElement p : context.getBasePileup() ) {
|
||||
byte base = p.getBase();
|
||||
if (!ReadsToDiscard.contains(p.getRead().getReadName()) && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
|
||||
G.add(p, true, false);
|
||||
G.add(p, true, false, 0);
|
||||
//if (DEBUG){
|
||||
if (base == 'A'){numAs++;}
|
||||
else if (base == 'C'){numCs++;}
|
||||
|
|
|
|||
|
|
@ -369,7 +369,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
}
|
||||
}else{
|
||||
//consider base in likelihood calculations if it looks good and has high mapping score
|
||||
G.add(p, true, false);
|
||||
G.add(p, true, false, 0);
|
||||
if (!AllReadNames.contains(readname)){AllReadNames.add(readname); AllReads.add(p.getRead());}
|
||||
if (base == 'A'){numAs++; depth++;}
|
||||
else if (base == 'C'){numCs++; depth++;}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
|
|
@ -78,6 +77,8 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
private double TS_FILTER_LEVEL = 99.0;
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
|
||||
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -169,35 +170,39 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
|
||||
for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
|
||||
if( vc != null ) {
|
||||
String filterString = null;
|
||||
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
final Double lod = (Double) lodMap.get( ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop() );
|
||||
if( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) {
|
||||
attrs.put(ContrastiveRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if( i == tranches.size() - 1 ) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
filterString = tranche.name;
|
||||
if( ContrastiveRecalibrator.checkRecalibrationMode( vc, MODE ) ) {
|
||||
String filterString = null;
|
||||
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
final Double lod = (Double) lodMap.get( ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop() );
|
||||
if( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) {
|
||||
attrs.put(ContrastiveRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if( i == tranches.size() - 1 ) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
filterString = tranche.name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if( filterString == null ) {
|
||||
filterString = tranches.get(0).name+"+";
|
||||
}
|
||||
|
||||
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
filters.add(filterString);
|
||||
vc = VariantContext.modifyFilters(vc, filters);
|
||||
}
|
||||
}
|
||||
|
||||
if( filterString == null ) {
|
||||
filterString = tranches.get(0).name+"+";
|
||||
}
|
||||
|
||||
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
filters.add(filterString);
|
||||
vc = VariantContext.modifyFilters(vc, filters);
|
||||
}
|
||||
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc, ref.getBase() );
|
||||
}
|
||||
|
||||
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,6 +83,10 @@ public class ContrastiveRecalibrator extends RodWalker<ExpandingArrayList<Varian
|
|||
@Hidden
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
@Hidden
|
||||
@Argument(fullName = "fixOmni", shortName = "fixOmni", doc = "Ignore the NOT_POLY_IN_1000G filter for the omni file because it is broken.", required = false)
|
||||
protected Boolean FIX_OMNI = false; //BUGBUG: remove me very soon!
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -144,21 +148,30 @@ public class ContrastiveRecalibrator extends RodWalker<ExpandingArrayList<Varian
|
|||
|
||||
for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
|
||||
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
|
||||
final VariantDatum datum = new VariantDatum();
|
||||
datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
|
||||
datum.pos = context.getLocation();
|
||||
datum.originalQual = vc.getPhredScaledQual();
|
||||
datum.isTransition = vc.isSNP() && vc.isBiallelic() && VariantContextUtils.isTransition(vc);
|
||||
dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC );
|
||||
final double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
|
||||
mapList.add( datum );
|
||||
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
|
||||
final VariantDatum datum = new VariantDatum();
|
||||
datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
|
||||
datum.pos = context.getLocation();
|
||||
datum.originalQual = vc.getPhredScaledQual();
|
||||
datum.isSNP = vc.isSNP() && vc.isBiallelic();
|
||||
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
|
||||
dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC, FIX_OMNI );
|
||||
final double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
|
||||
mapList.add( datum );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mapList;
|
||||
}
|
||||
|
||||
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
|
||||
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && vc.isIndel());
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
|
|||
|
|
@ -81,7 +81,6 @@ public class TrancheManager {
|
|||
|
||||
public static class TruthSensitivityMetric extends SelectionMetric {
|
||||
double[] runningSensitivity;
|
||||
double targetTiTv = 0;
|
||||
int nTrueSites = 0;
|
||||
|
||||
public TruthSensitivityMetric(int nTrueSites) {
|
||||
|
|
@ -181,16 +180,19 @@ public class TrancheManager {
|
|||
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
|
||||
|
||||
double minLod = data.get(minI).lod;
|
||||
for ( VariantDatum datum : data ) {
|
||||
for ( final VariantDatum datum : data ) {
|
||||
if ( datum.lod >= minLod ) {
|
||||
//if( ! datum.isKnown ) System.out.println(datum.pos);
|
||||
if ( datum.isKnown ) {
|
||||
numKnown++;
|
||||
if ( datum.isTransition ) { knownTi++; } else { knownTv++; }
|
||||
if( datum.isSNP ) {
|
||||
if ( datum.isTransition ) { knownTi++; } else { knownTv++; }
|
||||
}
|
||||
} else {
|
||||
numNovel++;
|
||||
if ( datum.isTransition ) { novelTi++; } else { novelTv++; }
|
||||
|
||||
if( datum.isSNP ) {
|
||||
if ( datum.isTransition ) { novelTi++; } else { novelTv++; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -213,5 +215,4 @@ public class TrancheManager {
|
|||
for ( VariantDatum d : data ) { n += (d.atTruthSite && d.lod >= minLOD ? 1 : 0); }
|
||||
return n;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration;
|
||||
|
||||
import cern.jet.random.Normal;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -7,6 +8,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -108,7 +110,7 @@ public class VariantDataManager {
|
|||
Collections.sort( data );
|
||||
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
|
||||
trainingData.addAll( data.subList(0, Math.round((float)bottomPercentage * data.size())) );
|
||||
logger.info( "Training with worst " + bottomPercentage * 100.0f + "% of data --> " + trainingData.size() + " variants with LOD <= " + String.format("%.4f", data.get(Math.round((float)bottomPercentage * data.size())).lod) + "." );
|
||||
logger.info( "Training with worst " + (float)bottomPercentage * 100.0f + "% of data --> " + trainingData.size() + " variants with LOD <= " + String.format("%.4f", data.get(Math.round((float)bottomPercentage * data.size())).lod) + "." );
|
||||
return trainingData;
|
||||
}
|
||||
|
||||
|
|
@ -161,6 +163,11 @@ public class VariantDataManager {
|
|||
} else {
|
||||
try {
|
||||
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
|
||||
if(Double.isNaN(value)) { throw new NumberFormatException(); }
|
||||
if( annotationKey.toLowerCase().contains("ranksum") ) { //BUGBUG: temporary hack
|
||||
if(MathUtils.compareDoubles(value, 0.0, 0.01) == 0) { value = Normal.staticNextDouble(2.0, 2.0); }
|
||||
else if(MathUtils.compareDoubles(value, 200.0, 0.01) == 0) { value = Normal.staticNextDouble(162.0, 20.0); }
|
||||
}
|
||||
} catch( final Exception e ) {
|
||||
throw new UserException.MalformedFile( vc.getSource(), "No double value detected for annotation = " + annotationKey + " in variant at " + VariantContextUtils.getLocation(genomeLocParser,vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ), e );
|
||||
}
|
||||
|
|
@ -168,19 +175,19 @@ public class VariantDataManager {
|
|||
return value;
|
||||
}
|
||||
|
||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
|
||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final boolean FIX_OMNI ) {
|
||||
datum.isKnown = false;
|
||||
datum.atTruthSite = false;
|
||||
datum.atTrainingSite = false;
|
||||
datum.prior = 3.0;
|
||||
datum.prior = 2.0;
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
final Collection<VariantContext> vcs = tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, true );
|
||||
final VariantContext trainVC = ( vcs.size() != 0 ? vcs.iterator().next() : null );
|
||||
if( trainVC != null && trainVC.isVariant() && trainVC.isNotFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
datum.isKnown = datum.isKnown || trainingSet.isKnown;
|
||||
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
|
||||
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
|
||||
datum.prior = Math.max( datum.prior, trainingSet.prior );
|
||||
for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) {
|
||||
if( trainVC != null && trainVC.isVariant() && (trainVC.isNotFiltered() || (FIX_OMNI && trainVC.getFilters().size()==1 && trainVC.getFilters().contains("NOT_POLY_IN_1000G"))) && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
datum.isKnown = datum.isKnown || trainingSet.isKnown;
|
||||
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
|
||||
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
|
||||
datum.prior = Math.max( datum.prior, trainingSet.prior );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ public class VariantDatum implements Comparable<VariantDatum> {
|
|||
public boolean atTruthSite;
|
||||
public boolean atTrainingSite;
|
||||
public boolean isTransition;
|
||||
public boolean isSNP;
|
||||
public double originalQual;
|
||||
public double prior;
|
||||
public GenomeLoc pos;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,14 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
|
||||
public class VariantRecalibratorArgumentCollection {
|
||||
|
||||
public enum Mode {
|
||||
SNP,
|
||||
INDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
|
||||
public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP;
|
||||
@Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false)
|
||||
public int MAX_GAUSSIANS = 32;
|
||||
@Argument(fullName="maxIterations", shortName="mI", doc="The maximum number of VBEM iterations to be performed in variational Bayes algorithm. Procedure will normally end when convergence is detected.", required=false)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,10 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -41,7 +39,7 @@ public class BAQ {
|
|||
private final static boolean DEBUG = false;
|
||||
|
||||
public enum CalculationMode {
|
||||
OFF, // don't apply a BAQ at all, the default
|
||||
OFF, // don't apply BAQ at all, the default
|
||||
CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag
|
||||
RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present
|
||||
}
|
||||
|
|
@ -81,7 +79,7 @@ public class BAQ {
|
|||
*/
|
||||
private double convertFromPhredScale(double x) { return (Math.pow(10,(-x)/10.));}
|
||||
|
||||
public double cd = -1; // gap open probility [1e-3]
|
||||
public double cd = -1; // gap open probability [1e-3]
|
||||
private double ce = 0.1; // gap extension probability [0.1]
|
||||
private int cb = 7; // band width [7]
|
||||
private boolean includeClippedBases = false;
|
||||
|
|
@ -430,7 +428,7 @@ public class BAQ {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a new qual array for read that includes the BAQ adjusted. Does not support on-the-fly BAQ calculation
|
||||
* Returns a new qual array for read that includes the BAQ adjustment. Does not support on-the-fly BAQ calculation
|
||||
*
|
||||
* @param read the SAMRecord to operate on
|
||||
* @param overwriteOriginalQuals If true, we replace the original qualities scores in the read with their BAQ'd version
|
||||
|
|
@ -461,6 +459,35 @@ public class BAQ {
|
|||
return newQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the BAQ adjusted quality score for this read at this offset. Does not support on-the-fly BAQ calculation
|
||||
*
|
||||
* @param read the SAMRecord to operate on
|
||||
* @param offset the offset of operate on
|
||||
* @param useRawQualsIfNoBAQTag If useRawQualsIfNoBAQTag is true, then if there's no BAQ annotation we just use the raw quality scores. Throws IllegalStateException is false and no BAQ tag is present
|
||||
* @return
|
||||
*/
|
||||
public static byte calcBAQFromTag(SAMRecord read, int offset, boolean useRawQualsIfNoBAQTag) {
|
||||
byte rawQual = read.getBaseQualities()[offset];
|
||||
byte newQual = rawQual;
|
||||
byte[] baq = getBAQTag(read);
|
||||
|
||||
if ( baq != null ) {
|
||||
// Offset to base alignment quality (BAQ), of the same length as the read sequence.
|
||||
// At the i-th read base, BAQi = Qi - (BQi - 64) where Qi is the i-th base quality.
|
||||
int baq_delta = (int)baq[offset] - 64;
|
||||
int newval = rawQual - baq_delta;
|
||||
if ( newval < 0 )
|
||||
throw new UserException.MalformedBAM(read, "BAQ tag error: the BAQ value is larger than the base quality");
|
||||
newQual = (byte)newval;
|
||||
|
||||
} else if ( ! useRawQualsIfNoBAQTag ) {
|
||||
throw new IllegalStateException("Required BAQ tag to be present, but none was on read " + read.getReadName());
|
||||
}
|
||||
|
||||
return newQual;
|
||||
}
|
||||
|
||||
public static class BAQCalculationResult {
|
||||
public byte[] refBases, rawQuals, readBases, bq;
|
||||
public int[] state;
|
||||
|
|
@ -549,7 +576,7 @@ public class BAQ {
|
|||
return new Pair<Integer, Integer>(queryStart, queryStop);
|
||||
}
|
||||
|
||||
// we need to bad ref by at least the bandwidth / 2 on either side
|
||||
// we need to pad ref by at least the bandwidth / 2 on either side
|
||||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) {
|
||||
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
|
||||
Pair<Integer, Integer> queryRange = calculateQueryRange(read);
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import java.util.Arrays;
|
|||
public class ExtendedEventPileupElement extends PileupElement {
|
||||
public enum Type {
|
||||
NOEVENT, DELETION, INSERTION
|
||||
};
|
||||
}
|
||||
|
||||
private Type type = null;
|
||||
private int eventLength = -1;
|
||||
|
|
@ -44,6 +44,7 @@ public class ExtendedEventPileupElement extends PileupElement {
|
|||
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
|
||||
private SAMRecord read;
|
||||
private int offset; // position in the read immediately BEFORE the event
|
||||
// This is broken! offset is always zero because these member variables are shadowed by base class
|
||||
|
||||
/** Constructor for extended pileup element (indel).
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,14 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
|
|
@ -18,7 +12,11 @@ import java.util.Arrays;
|
|||
*/
|
||||
public class PileupElement {
|
||||
public static final byte DELETION_BASE = BaseUtils.D;
|
||||
public static final byte DELETION_QUAL = 16;
|
||||
public static final byte DELETION_QUAL = (byte) 16;
|
||||
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
|
||||
public static final byte C_FOLLOWED_BY_INSERTION_BASE = (byte) 88;
|
||||
public static final byte T_FOLLOWED_BY_INSERTION_BASE = (byte) 89;
|
||||
public static final byte G_FOLLOWED_BY_INSERTION_BASE = (byte) 90;
|
||||
|
||||
protected SAMRecord read;
|
||||
protected int offset;
|
||||
|
|
@ -58,7 +56,7 @@ public class PileupElement {
|
|||
}
|
||||
|
||||
protected int getBaseIndex(final int offset) {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
|
||||
return BaseUtils.simpleBaseToBaseIndex(isDeletion() ? DELETION_BASE : read.getReadBases()[offset]);
|
||||
}
|
||||
|
||||
protected byte getQual(final int offset) {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -464,14 +465,12 @@ public class AlignmentUtils {
|
|||
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
/*
|
||||
if( alignPos > 0 ) {
|
||||
if( alignment[alignPos-1] == BaseUtils.A ) { alignment[alignPos-1] = PileupElement.A_FOLLOWED_BY_INSERTION_BASE; }
|
||||
else if( alignment[alignPos-1] == BaseUtils.C ) { alignment[alignPos-1] = PileupElement.C_FOLLOWED_BY_INSERTION_BASE; }
|
||||
else if( alignment[alignPos-1] == BaseUtils.T ) { alignment[alignPos-1] = PileupElement.T_FOLLOWED_BY_INSERTION_BASE; }
|
||||
else if( alignment[alignPos-1] == BaseUtils.G ) { alignment[alignPos-1] = PileupElement.G_FOLLOWED_BY_INSERTION_BASE; }
|
||||
}
|
||||
*/
|
||||
case S:
|
||||
for ( int jjj = 0; jjj < elementLength; jjj++ ) {
|
||||
readPos++;
|
||||
|
|
|
|||
|
|
@ -45,11 +45,6 @@ public class GATKSAMRecord extends SAMRecord {
|
|||
// These attributes exist in memory only, and are never written to disk.
|
||||
private Map<Object, Object> temporaryAttributes;
|
||||
|
||||
// A bitset which represents the bases of the read. If a bit is set, then
|
||||
// the base is good; otherwise it is a bad base (as defined by the setter).
|
||||
// TODO: this is a temporary hack. If it works, clean it up.
|
||||
private BitSet mBitSet = null;
|
||||
|
||||
public GATKSAMRecord(SAMRecord record, boolean useOriginalBaseQualities, byte defaultBaseQualities) {
|
||||
super(null); // it doesn't matter - this isn't used
|
||||
if ( record == null )
|
||||
|
|
@ -91,15 +86,6 @@ public class GATKSAMRecord extends SAMRecord {
|
|||
throw new UserException.MalformedBAM(this, String.format("Error: the number of base qualities does not match the number of bases in %s. Use -DBQ x to set a default base quality score to all reads so GATK can proceed.", mRecord.getReadName()));
|
||||
}
|
||||
|
||||
public void setGoodBases(GATKSAMRecordFilter filter, boolean abortIfAlreadySet) {
|
||||
if ( mBitSet == null || !abortIfAlreadySet )
|
||||
mBitSet = filter.getGoodBases(this);
|
||||
}
|
||||
|
||||
public boolean isGoodBase(int index) {
|
||||
return ( mBitSet == null || mBitSet.size() <= index ? true : mBitSet.get(index));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// *** The following methods are overloaded to cache the appropriate data ***//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A filtering interface for GATKSAMRecords.
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface GATKSAMRecordFilter {
|
||||
public BitSet getGoodBases(final GATKSAMRecord record);
|
||||
}
|
||||
|
|
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("c62ee353f7ddafa2348b461d9e107f2f"));
|
||||
Arrays.asList("b4c3bd5cf46a081fd68bea1cbb1eb006"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("0c43cfb9b08f63f49396e3bf8b3d843f"));
|
||||
Arrays.asList("81417fd6c6635873349e27a70141eab5"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("3aa1415f7bc6d41c4f14343187377f96"));
|
||||
Arrays.asList("30fe259f4cc932e592343d0b9dfd192b"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("bf3bc299552b595f791486a1f8dbc509"));
|
||||
Arrays.asList("c376f5d378c138271dba33c25d6a8592"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("28829de1afd9dcb41ed666f949c4f5cc"));
|
||||
Arrays.asList("2e2ef351669a07238923088347494be6"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("04f677a7bed221037e8426c729641b65"));
|
||||
Arrays.asList("426c0f2f8b94bdb50736519769a11355"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("9180942ec0b93b93eb1c7a1b311e291b"));
|
||||
Arrays.asList("f11c43c69e6aca81d7ec98d7ac39e5f6"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
|
||||
Arrays.asList("2ade14115912061c39bcbfcd788d5f6a"));
|
||||
Arrays.asList("921ff533873c37acd3849f56e41ebeec"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
|
||||
Arrays.asList("6e87613ce2be9f97581396805c6f9173"));
|
||||
Arrays.asList("8ea6f267094ab334ec0415a9177e2588"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,24 +15,24 @@ import java.util.Map;
|
|||
|
||||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER";
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -dt NONE";
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing normal calling
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
//@Test
|
||||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("e7514b0f1f2df1ca42815b5c45775f36"));
|
||||
Arrays.asList("65636c29ce1abb3dea095d876f61e45e"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testMultiSamplePilot2AndRecallingWithAlleles() {
|
||||
String md5 = "49ae129435063f47f0faf00337eb8bf7";
|
||||
String md5 = "0d2d2193ff5cf0a3f10292c2d0859f00";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||
|
|
@ -45,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testWithAllelesPassedIn() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
|
|
@ -58,11 +58,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("8da08fe12bc0d95e548fe63681997038"));
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -glm SNP -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("ad6dfb8c341c3a40d3944a32d29e94f4"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,9 +72,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "950ebf5eaa245131dc22aef211526681";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "9cfe952c0cb3e58fb7bf12b0df6d96f4";
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testCompressedOutput() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
|
|
@ -97,9 +97,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testParallelization() {
|
||||
String md5 = "73845a29994d653a4d3f01b5467a3eed";
|
||||
String md5 = "8d9965a5a4d23520a6a07272f217081e";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -123,13 +123,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "5f1cfb9c7f82e6414d5db7aa344813ac" );
|
||||
e.put( "--min_mapping_quality_score 26", "6c3ad441f3a23ade292549b1dea80932" );
|
||||
e.put( "--min_mapping_quality_score 26", "3c0104c8ae70f8f8def90f317decd5ff" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "5ecaf4281410b67e8e2e164f2ea0d58a" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "17ffb56d078fdde335a79773e9534ce7" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "27c76efb74acb62aa13a6685c7566e6c" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -139,11 +139,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "71e561ba6fc66bd8b84907252f71ea55" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "4ffcb1e1f20ce175783c32c30deef8db" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "fee7d5f562ecd2a1b7c7ed6ac6b80a97" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "2ca25ad91f7a746f715afdca5d516768" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
|
|
@ -154,11 +154,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
//@Test
|
||||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("17ffb56d078fdde335a79773e9534ce7"));
|
||||
Arrays.asList("27c76efb74acb62aa13a6685c7566e6c"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -172,10 +172,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// testing heterozygosity
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
//@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "7e820c33a4c688148eec57342645c9b6" );
|
||||
e.put( 0.01, "37d6e63d313206b11b99194d16aa2147" );
|
||||
e.put( 1.0 / 1850, "c1c48e0c4724b75f12936e22a8629457" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
|
|
@ -192,7 +192,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// testing calls with SLX, 454, and SOLID data
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
//@Test
|
||||
public void testMultiTechnologies() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand +
|
||||
|
|
@ -200,56 +200,75 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("5974d8c21d27d014e2d0bed695b0b42e"));
|
||||
Arrays.asList("645b9da6cba47bda8ee2142a6bb92d2c"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing calls with BAQ
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//@Test
|
||||
public void testCallingWithBAQ() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
|
||||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("1fe33a09dff4624a4502e32072f34c0e"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing indel caller
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
// Basic indel testing with SLX data
|
||||
@Test
|
||||
//@Test
|
||||
public void testSimpleIndels() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam" +
|
||||
" -o %s" +
|
||||
" -glm DINDEL" +
|
||||
" -glm INDEL" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("a595df10d47a1fa7b2e1df2cd66d7ff0"));
|
||||
Arrays.asList("d4c15c60ffefe754d83e1164e78f2b6a"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
||||
// Basic indel testing with SLX data
|
||||
@Test
|
||||
//@Test
|
||||
public void testIndelsWithLowMinAlleleCnt() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam" +
|
||||
" -o %s" +
|
||||
" -glm DINDEL -minIndelCnt 1" +
|
||||
" -glm INDEL -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("e65e7be8bf736a39d86dbb9b51c89a8e"));
|
||||
Arrays.asList("599220ba0cc5d8a32e4952fca85fd080"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiTechnologyIndels() {
|
||||
//@Test
|
||||
public void testMultiTechnologyIndels() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand +
|
||||
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
|
||||
" -o %s" +
|
||||
" -glm DINDEL" +
|
||||
" -glm INDEL" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("8f853cc012d05d341eb55656a7354716"));
|
||||
Arrays.asList("4c8c2ac4e024f70779465fb14c5d8c8a"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -259,23 +278,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// todo - test fails because for some reason when including -nt we get "PASS" instead of . in filter fields
|
||||
public void testIndelParallelization() {
|
||||
String md5 = "e65e7be8bf736a39d86dbb9b51c89a8e";
|
||||
String md5 = "599220ba0cc5d8a32e4952fca85fd080";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation +
|
||||
"NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -glm DINDEL -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
"NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -glm INDEL -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test indel caller parallelization (single thread)", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation +
|
||||
"NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -glm DINDEL -o %s -L 1:10,000,000-10,100,000 -nt 2", 1,
|
||||
"NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -glm INDEL -o %s -L 1:10,000,000-10,100,000 -nt 2", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test indel caller parallelization (2 threads)", spec2);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation +
|
||||
"NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -glm DINDEL -o %s -L 1:10,000,000-10,100,000 -nt 4", 1,
|
||||
"NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -glm INDEL -o %s -L 1:10,000,000-10,100,000 -nt 4", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test indel caller parallelization (4 threads)", spec3);
|
||||
}
|
||||
|
|
@ -285,14 +304,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithIndelAllelesPassedIn() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000 -glm DINDEL", 1,
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000 -glm INDEL", 1,
|
||||
Arrays.asList("e95c545b8ae06f0721f260125cfbe1f0"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000 -glm DINDEL", 1,
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000 -glm INDEL", 1,
|
||||
Arrays.asList("6c96d76b9bc3aade0c768d7c657ae210"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,14 +25,14 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
|
||||
"644e37fe6edaee13ddf9f3c4f0e6f500", // tranches
|
||||
"e467bfd4ab74c7a7ff303cf810c514b3", // recal file
|
||||
"05f66c959d1f8e78bcb4298092524921"); // cut VCF
|
||||
"74127253b3e551ae602b5957eef8e37e", // tranches
|
||||
"38fe940269ce56449b2e4e3493f89cd9", // recal file
|
||||
"067197ba4212a38c7fd736c1f66b294d"); // cut VCF
|
||||
|
||||
VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf",
|
||||
"e0a3845619d2138717df39ef1e9ee75f", // tranches
|
||||
"c3fa524d0bdb40d6bd7aeda9e8dd882c", // recal file
|
||||
"9f21a668c3fff13f382367851a11303c"); // cut VCF
|
||||
"40242ee5ce97ffde50cc7a12cfa0d841", // tranches
|
||||
"eb8df1fbf3d71095bdb3a52713dd2a64", // recal file
|
||||
"13aa221241a974883072bfad14ad8afc"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -45,12 +45,12 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:dbsnp,DBSNP,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
" -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/HapMap/3.2/sites_r27_nr.b36_fwd.vcf" +
|
||||
" -B:omni,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/1212samples.b36.vcf" +
|
||||
" -B:hapmap,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.2/sites_r27_nr.b36_fwd.vcf" +
|
||||
" -B:omni,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/1212samples.b36.vcf" +
|
||||
" -T ContrastiveRecalibrator" +
|
||||
" -B:input,VCF " + params.inVCF +
|
||||
" -L 1:50,000,000-120,000,000" +
|
||||
" -an QD -an MQ -an SB -an AF" +
|
||||
" -an QD -an MQ -an SB" +
|
||||
" --trustAllPolymorphic" + // for speed
|
||||
" -recalFile %s" +
|
||||
" -tranchesFile %s",
|
||||
|
|
|
|||
Loading…
Reference in New Issue