Merge branch 'threadMonitors'
Conflicts: private/scala/qscript/org/broadinstitute/sting/queue/qscripts/performance/GATKPerformanceOverTime.scala
This commit is contained in:
commit
95a1337285
|
|
@ -53,13 +53,14 @@ public class ErrorModel {
|
|||
|
||||
PairHMMIndelErrorModel pairModel = null;
|
||||
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
|
||||
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
|
||||
double[][] perReadLikelihoods = null;
|
||||
|
||||
double[] model = new double[maxQualityScore+1];
|
||||
Arrays.fill(model,Double.NEGATIVE_INFINITY);
|
||||
|
||||
boolean hasCalledAlleles = false;
|
||||
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
if (refSampleVC != null) {
|
||||
|
||||
for (Allele allele : refSampleVC.getAlleles()) {
|
||||
|
|
@ -72,7 +73,6 @@ public class ErrorModel {
|
|||
if (refSampleVC.isIndel()) {
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
||||
}
|
||||
}
|
||||
|
|
@ -92,12 +92,12 @@ public class ErrorModel {
|
|||
|
||||
Allele refAllele = refSampleVC.getReference();
|
||||
|
||||
if (refSampleVC.isIndel()) {
|
||||
if ( refSampleVC.isIndel()) {
|
||||
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
||||
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
||||
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
||||
if (!haplotypeMap.isEmpty())
|
||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||
}
|
||||
int idx = 0;
|
||||
for (PileupElement refPileupElement : refSamplePileup) {
|
||||
|
|
|
|||
|
|
@ -41,15 +41,6 @@ import java.util.*;
|
|||
|
||||
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
|
||||
//protected Set<String> laneIDs;
|
||||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
POOLSNP,
|
||||
POOLINDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
final protected UnifiedArgumentCollection UAC;
|
||||
|
||||
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
|
|
@ -203,7 +194,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser) {
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
HashMap<String, ErrorModel> perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts);
|
||||
if (perLaneErrorModels == null && UAC.referenceSampleName != null)
|
||||
|
|
@ -215,8 +207,11 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
newContext.put(DUMMY_SAMPLE_NAME,mergedContext);
|
||||
contexts = newContext;
|
||||
}
|
||||
|
||||
// get initial alleles to genotype
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
// starting a new site: clear allele list
|
||||
perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods
|
||||
}
|
||||
// get initial alleles to genotype
|
||||
final List<Allele> allAlleles = new ArrayList<Allele>();
|
||||
if (allAllelesToUse == null || allAllelesToUse.isEmpty())
|
||||
allAlleles.addAll(getInitialAllelesToUse(tracker, ref,contexts,contextType,locParser, allAllelesToUse));
|
||||
|
|
@ -234,9 +229,13 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
continue;
|
||||
|
||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
|
||||
// no likelihoods have been computed for this sample at this site
|
||||
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
|
||||
}
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
|
||||
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO, perReadAlleleLikelihoodMap.get(sample.getKey()));
|
||||
// actually compute likelihoods
|
||||
final int nGoodBases = GL.add(pileup, UAC);
|
||||
if ( nGoodBases > 0 )
|
||||
|
|
@ -333,7 +332,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation);
|
||||
final boolean ignoreLaneInformation,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap);
|
||||
|
||||
protected abstract List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
double[][] readHaplotypeLikelihoods;
|
||||
|
||||
final byte refBase;
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
|
||||
|
||||
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
|
|
@ -34,7 +35,8 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
final boolean ignoreLaneInformation,
|
||||
final PairHMMIndelErrorModel pairModel,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext referenceContext) {
|
||||
final ReferenceContext referenceContext,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||
this.pairModel = pairModel;
|
||||
this.haplotypeMap = haplotypeMap;
|
||||
|
|
@ -42,6 +44,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
||||
// todo - not needed if indel alleles have base at current position
|
||||
this.refBase = referenceContext.getBase();
|
||||
this.perReadAlleleLikelihoodMap = perReadAlleleLikelihoodMap;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -142,8 +145,9 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
|
||||
|
||||
if (!hasReferenceSampleData) {
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||
n = readHaplotypeLikelihoods.length;
|
||||
} else {
|
||||
Allele refAllele = null;
|
||||
|
|
|
|||
|
|
@ -73,8 +73,9 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
|
|||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation){
|
||||
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
|
||||
final boolean ignoreLaneInformation,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
|
|
@ -90,7 +91,6 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
|
|||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
||||
haplotypeMap.clear();
|
||||
}
|
||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,8 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General
|
|||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation) {
|
||||
final boolean ignoreLaneInformation,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,14 +42,12 @@ import java.util.*;
|
|||
public class GenotypingEngine {
|
||||
|
||||
private final boolean DEBUG;
|
||||
private final int MNP_LOOK_AHEAD;
|
||||
private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
||||
private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
||||
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
|
||||
|
||||
public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
|
||||
public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
|
||||
this.DEBUG = DEBUG;
|
||||
this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD;
|
||||
this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
||||
noCall.add(Allele.NO_CALL);
|
||||
}
|
||||
|
|
@ -120,7 +118,7 @@ public class GenotypingEngine {
|
|||
System.out.println( "> Cigar = " + h.getCigar() );
|
||||
}
|
||||
// Walk along the alignment and turn any difference from the reference into an event
|
||||
h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
|
||||
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
|
||||
startPosKeySet.addAll(h.getEventMap().keySet());
|
||||
}
|
||||
|
||||
|
|
@ -203,7 +201,7 @@ public class GenotypingEngine {
|
|||
if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
// Walk along the alignment and turn any difference from the reference into an event
|
||||
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
|
||||
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
|
||||
if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
|
||||
if( DEBUG ) {
|
||||
System.out.println( h.toString() );
|
||||
|
|
@ -521,11 +519,7 @@ public class GenotypingEngine {
|
|||
return false;
|
||||
}
|
||||
|
||||
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
|
||||
return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code
|
||||
}
|
||||
|
||||
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
|
||||
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
|
||||
final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
|
||||
|
||||
int refPos = alignmentStartHapwrtRef;
|
||||
|
|
@ -543,7 +537,7 @@ public class GenotypingEngine {
|
|||
if( BaseUtils.isRegularBase(refByte) ) {
|
||||
insertionAlleles.add( Allele.create(refByte, true) );
|
||||
}
|
||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
} else {
|
||||
byte[] insertionBases = new byte[]{};
|
||||
|
|
@ -590,39 +584,16 @@ public class GenotypingEngine {
|
|||
case EQ:
|
||||
case X:
|
||||
{
|
||||
int numSinceMismatch = -1;
|
||||
int stopOfMismatch = -1;
|
||||
int startOfMismatch = -1;
|
||||
int refPosStartOfMismatch = -1;
|
||||
for( int iii = 0; iii < elementLength; iii++ ) {
|
||||
if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) {
|
||||
// SNP or start of possible MNP
|
||||
if( stopOfMismatch == -1 ) {
|
||||
startOfMismatch = alignmentPos;
|
||||
stopOfMismatch = alignmentPos;
|
||||
numSinceMismatch = 0;
|
||||
refPosStartOfMismatch = refPos;
|
||||
} else {
|
||||
stopOfMismatch = alignmentPos;
|
||||
}
|
||||
}
|
||||
if( stopOfMismatch != -1) {
|
||||
numSinceMismatch++;
|
||||
}
|
||||
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
|
||||
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
|
||||
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
|
||||
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
|
||||
final byte refByte = ref[refPos];
|
||||
final byte altByte = alignment[alignmentPos];
|
||||
if( refByte != altByte ) { // SNP!
|
||||
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) {
|
||||
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
||||
snpAlleles.add( Allele.create( refBases, true ) );
|
||||
snpAlleles.add( Allele.create( mismatchBases, false ) );
|
||||
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
|
||||
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
|
||||
snpAlleles.add( Allele.create( refByte, true ) );
|
||||
snpAlleles.add( Allele.create( altByte, false ) );
|
||||
vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make());
|
||||
}
|
||||
numSinceMismatch = -1;
|
||||
stopOfMismatch = -1;
|
||||
startOfMismatch = -1;
|
||||
refPosStartOfMismatch = -1;
|
||||
}
|
||||
refPos++;
|
||||
alignmentPos++;
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
|
|
@ -45,10 +46,6 @@ import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
|||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
|
@ -122,10 +119,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
|
||||
protected String keepRG = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false)
|
||||
protected int MNP_LOOK_AHEAD = 0;
|
||||
|
||||
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
||||
protected int MIN_PRUNE_FACTOR = 1;
|
||||
|
||||
|
|
@ -138,7 +131,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false)
|
||||
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
|
||||
protected int gcpHMM = 10;
|
||||
|
||||
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
||||
|
|
@ -181,7 +174,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
* so annotations will be excluded even if they are explicitly included with the other options.
|
||||
*/
|
||||
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
|
||||
protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"}));
|
||||
protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"}));
|
||||
|
||||
/**
|
||||
* Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
|
||||
|
|
@ -192,21 +185,21 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@ArgumentCollection
|
||||
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
|
||||
|
||||
// the calculation arguments
|
||||
private UnifiedGenotyperEngine UG_engine = null;
|
||||
private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
|
||||
|
||||
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
|
||||
protected boolean DEBUG;
|
||||
|
||||
// the UG engines
|
||||
private UnifiedGenotyperEngine UG_engine = null;
|
||||
private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
|
||||
|
||||
// the assembly engine
|
||||
LocalAssemblyEngine assemblyEngine = null;
|
||||
private LocalAssemblyEngine assemblyEngine = null;
|
||||
|
||||
// the likelihoods engine
|
||||
LikelihoodCalculationEngine likelihoodCalculationEngine = null;
|
||||
private LikelihoodCalculationEngine likelihoodCalculationEngine = null;
|
||||
|
||||
// the genotyping engine
|
||||
GenotypingEngine genotypingEngine = null;
|
||||
private GenotypingEngine genotypingEngine = null;
|
||||
|
||||
// the annotation engine
|
||||
private VariantAnnotatorEngine annotationEngine;
|
||||
|
|
@ -292,7 +285,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
|
||||
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
|
||||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
|
||||
genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
|
||||
genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -418,7 +411,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
||||
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
||||
|
||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
||||
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
||||
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
|
|
@ -337,8 +338,8 @@ public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// order results by start position
|
||||
final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0));
|
||||
final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0));
|
||||
final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source1Haplotype), 0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1));
|
||||
final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source2Haplotype), 0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2));
|
||||
if ( source1Map.size() == 0 || source2Map.size() == 0 ) {
|
||||
// TODO -- handle errors appropriately
|
||||
logger.debug("No source alleles; aborting at " + refContext.getLocus());
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -322,11 +323,13 @@ public class LikelihoodCalculationEngine {
|
|||
return bestHaplotypes;
|
||||
}
|
||||
|
||||
public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
|
||||
public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
||||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
|
||||
//final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
|
||||
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
||||
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
|
||||
final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
|
||||
|
|
@ -334,51 +337,31 @@ public class LikelihoodCalculationEngine {
|
|||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||
final double likelihoods[] = new double[call.getFirst().getAlleles().size()];
|
||||
int count = 0;
|
||||
for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
|
||||
double maxLikelihood = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for( final Allele a : call.getFirst().getAlleles() ) {
|
||||
for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
|
||||
final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
|
||||
if( likelihood > maxLikelihood ) {
|
||||
maxLikelihood = likelihood;
|
||||
}
|
||||
}
|
||||
likelihoods[count++] = maxLikelihood;
|
||||
}
|
||||
final int bestAllele = MathUtils.maxElementIndex(likelihoods);
|
||||
final double bestLikelihood = likelihoods[bestAllele];
|
||||
Allele allele = Allele.NO_CALL;
|
||||
boolean isInformativeRead = false;
|
||||
for( final double likelihood : likelihoods ) {
|
||||
if( bestLikelihood - likelihood > BEST_LIKELIHOOD_THRESHOLD ) {
|
||||
isInformativeRead = true;
|
||||
break;
|
||||
likelihoodMap.add(read, a, likelihood);
|
||||
}
|
||||
}
|
||||
// uninformative reads get the no call Allele
|
||||
if( isInformativeRead ) {
|
||||
allele = call.getFirst().getAlleles().get(bestAllele);
|
||||
}
|
||||
List<GATKSAMRecord> readList = alleleReadMap.get(allele);
|
||||
if( readList == null ) {
|
||||
readList = new ArrayList<GATKSAMRecord>();
|
||||
alleleReadMap.put(allele, readList);
|
||||
}
|
||||
readList.add(read);
|
||||
}
|
||||
}
|
||||
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
||||
/* // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
||||
List<GATKSAMRecord> readList = alleleReadMap.get(Allele.NO_CALL);
|
||||
if( readList == null ) {
|
||||
readList = new ArrayList<GATKSAMRecord>();
|
||||
alleleReadMap.put(Allele.NO_CALL, readList);
|
||||
}
|
||||
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
||||
*/
|
||||
/* for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||
readList.add(read);
|
||||
}
|
||||
}
|
||||
returnMap.put(sample.getKey(), alleleReadMap);
|
||||
*/
|
||||
returnMap.put(sample.getKey(), likelihoodMap);
|
||||
|
||||
}
|
||||
return returnMap;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -201,7 +201,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
// compute mean number of reduced read counts in current kmer span
|
||||
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
||||
// precise rounding can make a difference with low consensus counts
|
||||
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||
countNumber = MathUtils.arrayMax(counts);
|
||||
// countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||
}
|
||||
|
||||
if( !badKmer ) {
|
||||
|
|
@ -292,7 +293,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
|
||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
||||
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
|
||||
final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
|
||||
final String REFSAMPLE_NAME = "NA12878";
|
||||
final String MTINTERVALS = "MT:1-3000";
|
||||
final String LSVINTERVALS = "20:40,000,000-41,000,000";
|
||||
final String MTINTERVALS = "MT:1-1000";
|
||||
final String LSVINTERVALS = "20:40,500,000-41,000,000";
|
||||
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||
|
|
@ -47,31 +47,31 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","077db83cf7dc5490f670c85856b408b2");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","e460a17377b731ff4eab36fb56042ecd");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9514ed15c7030b6d47e04e6a3a2b0a3e");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","da359fe7dd6dce045193198c264301ee");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "ad0eef3a9deaa098d79df62af7e5448a");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -142,7 +142,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
byte[] ref;
|
||||
byte[] hap;
|
||||
HashMap<Integer,Byte> expected;
|
||||
GenotypingEngine ge = new GenotypingEngine(false, 0, false);
|
||||
|
||||
public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
|
||||
super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
|
||||
|
|
@ -153,7 +152,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
|
||||
public HashMap<Integer,VariantContext> calcAlignment() {
|
||||
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
|
||||
return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0);
|
||||
return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,9 +8,10 @@ import java.util.Arrays;
|
|||
public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||
final static String REF = b37KGReference;
|
||||
final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||
//final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr";
|
||||
|
||||
private void HCTest(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
|
|
@ -20,28 +21,49 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "6b30c7e1b6bbe80d180d9d67441cec12");
|
||||
HCTest(CEUTRIO_BAM, "", "e5b4a0627a1d69b9356f8a7cd2260e89");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "4cdfbfeadef00725974828310558d7d4");
|
||||
HCTest(NA12878_BAM, "", "202d5b6edaf74f411c170099749f202f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "6183fb6e374976d7087150009685e043");
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "561931ba3919808ec471e745cb3148c7");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 2";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "ab7593a7a60a2e9a66053572f1718df1");
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "3424b398a9f47c8ac606a5c56eb7d8a7");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 2";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b71cfaea9390136c584c9671b149d573");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e1f88fac91424740c0eaac1de48b3970");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -372,7 +372,7 @@ public class GenomeAnalysisEngine {
|
|||
else if(argCollection.numberOfIOThreads != null)
|
||||
numIOThreads = argCollection.numberOfIOThreads;
|
||||
|
||||
this.threadAllocation = new ThreadAllocation(argCollection.numberOfThreads,numCPUThreads,numIOThreads);
|
||||
this.threadAllocation = new ThreadAllocation(argCollection.numberOfThreads, numCPUThreads, numIOThreads, ! argCollection.disableEfficiencyMonitor);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -283,6 +283,14 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
public Integer numberOfThreads = 1;
|
||||
|
||||
/**
|
||||
* By default the GATK monitors its own efficiency, but this can have a itsy-bitsy tiny
|
||||
* cost (< 0.1%) in runtime because of turning on the JavaBean. This argument allows you
|
||||
* to disable the monitor
|
||||
*/
|
||||
@Argument(fullName = "disableThreadEfficiencyMonitor", shortName = "dtem", doc = "Disable GATK efficiency monitoring", required = false)
|
||||
public Boolean disableEfficiencyMonitor = false;
|
||||
|
||||
/**
|
||||
* The following two arguments (num_cpu_threads, num_io_threads are TEMPORARY since Queue cannot currently support arbitrary tagged data types.
|
||||
* TODO: Kill this when I can do a tagged integer in Queue.
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.threading.EfficiencyMonitoringThreadFactory;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||
|
||||
import java.util.Collection;
|
||||
|
|
@ -80,9 +81,22 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
* @param reference Reference for driving the traversal.
|
||||
* @param nThreadsToUse maximum number of threads to use to do the work
|
||||
*/
|
||||
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
||||
protected HierarchicalMicroScheduler(final GenomeAnalysisEngine engine,
|
||||
final Walker walker,
|
||||
final SAMDataSource reads,
|
||||
final IndexedFastaSequenceFile reference,
|
||||
final Collection<ReferenceOrderedDataSource> rods,
|
||||
final int nThreadsToUse,
|
||||
final boolean monitorThreadPerformance ) {
|
||||
super(engine, walker, reads, reference, rods);
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
|
||||
if ( monitorThreadPerformance ) {
|
||||
final EfficiencyMonitoringThreadFactory monitoringThreadFactory = new EfficiencyMonitoringThreadFactory(nThreadsToUse);
|
||||
setThreadEfficiencyMonitor(monitoringThreadFactory);
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse, monitoringThreadFactory);
|
||||
} else {
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
}
|
||||
}
|
||||
|
||||
public Object execute( Walker walker, Iterable<Shard> shardStrategy ) {
|
||||
|
|
@ -140,6 +154,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
// do final cleanup operations
|
||||
outputTracker.close();
|
||||
cleanup();
|
||||
executionIsDone();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.OutputTracker;
|
|||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
|
|
@ -33,8 +34,17 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
* @param reference Reference for driving the traversal.
|
||||
* @param rods Reference-ordered data.
|
||||
*/
|
||||
protected LinearMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods ) {
|
||||
protected LinearMicroScheduler(final GenomeAnalysisEngine engine,
|
||||
final Walker walker,
|
||||
final SAMDataSource reads,
|
||||
final IndexedFastaSequenceFile reference,
|
||||
final Collection<ReferenceOrderedDataSource> rods,
|
||||
final boolean monitorThreadPerformance ) {
|
||||
super(engine, walker, reads, reference, rods);
|
||||
|
||||
if ( monitorThreadPerformance )
|
||||
setThreadEfficiencyMonitor(new ThreadEfficiencyMonitor());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -88,6 +98,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
|
||||
outputTracker.close();
|
||||
cleanup();
|
||||
executionIsDone();
|
||||
|
||||
return accumulator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.traversals.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
||||
|
||||
import javax.management.JMException;
|
||||
import javax.management.MBeanServer;
|
||||
|
|
@ -79,6 +80,13 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
private final MBeanServer mBeanServer;
|
||||
private final ObjectName mBeanName;
|
||||
|
||||
/**
|
||||
* Threading efficiency monitor for tracking the resource utilization of the GATK
|
||||
*
|
||||
* may be null
|
||||
*/
|
||||
ThreadEfficiencyMonitor threadEfficiencyMonitor = null;
|
||||
|
||||
/**
|
||||
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
||||
* selected walker.
|
||||
|
|
@ -98,11 +106,11 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
if(walker instanceof ReadWalker)
|
||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s is a read walker. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||
logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
|
||||
return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads());
|
||||
return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
|
||||
} else {
|
||||
if(threadAllocation.getNumCPUThreads() > 1)
|
||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||
return new LinearMicroScheduler(engine, walker, reads, reference, rods);
|
||||
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.monitorThreadEfficiency());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -150,6 +158,16 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Inform this Microscheduler to use the efficiency monitor used to create threads in subclasses
|
||||
*
|
||||
* @param threadEfficiencyMonitor
|
||||
*/
|
||||
public void setThreadEfficiencyMonitor(final ThreadEfficiencyMonitor threadEfficiencyMonitor) {
|
||||
this.threadEfficiencyMonitor = threadEfficiencyMonitor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks a walker over the given list of intervals.
|
||||
*
|
||||
|
|
@ -183,6 +201,18 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
traversalEngine.printOnTraversalDone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be called by subclasses when execute is done
|
||||
*/
|
||||
protected void executionIsDone() {
|
||||
// Print out the threading efficiency of this HMS, if state monitoring is enabled
|
||||
if ( threadEfficiencyMonitor != null ) {
|
||||
// include the master thread information
|
||||
threadEfficiencyMonitor.threadIsDone(Thread.currentThread());
|
||||
threadEfficiencyMonitor.printUsageInformation(logger);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the engine that created this microscheduler.
|
||||
* @return The engine owning this microscheduler.
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
final boolean isSingleElementCigar = nextElement == lastElement;
|
||||
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
||||
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
|
||||
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
||||
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
||||
|
|
@ -331,6 +331,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
String insertedBaseString = null;
|
||||
if (nextOp == CigarOperator.I) {
|
||||
final int insertionOffset = isSingleElementCigar ? 0 : 1;
|
||||
// TODO -- someone please implement a better fix for the single element insertion CIGAR!
|
||||
if (isSingleElementCigar)
|
||||
readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
|
||||
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,11 @@ public class ThreadAllocation {
|
|||
*/
|
||||
private final int numIOThreads;
|
||||
|
||||
/**
|
||||
* Should we monitor thread efficiency?
|
||||
*/
|
||||
private final boolean monitorEfficiency;
|
||||
|
||||
public int getNumCPUThreads() {
|
||||
return numCPUThreads;
|
||||
}
|
||||
|
|
@ -48,11 +53,15 @@ public class ThreadAllocation {
|
|||
return numIOThreads;
|
||||
}
|
||||
|
||||
public boolean monitorThreadEfficiency() {
|
||||
return monitorEfficiency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the default thread allocation.
|
||||
*/
|
||||
public ThreadAllocation() {
|
||||
this(1,null,null);
|
||||
this(1, null, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -62,7 +71,7 @@ public class ThreadAllocation {
|
|||
* @param numCPUThreads Total number of threads allocated to the traversal.
|
||||
* @param numIOThreads Total number of threads allocated exclusively to IO.
|
||||
*/
|
||||
public ThreadAllocation(final int totalThreads, final Integer numCPUThreads, final Integer numIOThreads) {
|
||||
public ThreadAllocation(final int totalThreads, final Integer numCPUThreads, final Integer numIOThreads, final boolean monitorEfficiency) {
|
||||
// If no allocation information is present, allocate all threads to CPU
|
||||
if(numCPUThreads == null && numIOThreads == null) {
|
||||
this.numCPUThreads = totalThreads;
|
||||
|
|
@ -88,6 +97,7 @@ public class ThreadAllocation {
|
|||
this.numCPUThreads = numCPUThreads;
|
||||
this.numIOThreads = numIOThreads;
|
||||
}
|
||||
}
|
||||
|
||||
this.monitorEfficiency = monitorEfficiency;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -51,7 +52,12 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
|
||||
|
||||
char[] BASES = {'A','C','G','T'};
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -24,7 +25,14 @@ import java.util.List;
|
|||
*/
|
||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) {
|
||||
public void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final AlignmentContext stratifiedContext,
|
||||
final VariantContext vc,
|
||||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
|
||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||
if (ratio == null)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -52,7 +53,12 @@ import java.util.Map;
|
|||
*/
|
||||
public class BaseCounts extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -21,66 +23,37 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); }
|
||||
|
||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if( isUsableBase(p) ) {
|
||||
if ( p.getBase() == ref )
|
||||
refQuals.add((double)p.getQual());
|
||||
else if ( alts.contains(p.getBase()) )
|
||||
altQuals.add((double)p.getQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
// TODO -- implement me; how do we pull out the correct offset from the read?
|
||||
return;
|
||||
|
||||
/*
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
protected void fillQualsFromPileup(final List<Allele> allAlleles, final int refLoc,
|
||||
final ReadBackedPileup pileup,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,
|
||||
final List<Double> refQuals, final List<Double> altQuals){
|
||||
|
||||
if (alleleLikelihoodMap == null) {
|
||||
// use fast SNP-based version if we don't have per-read allele likelihoods
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( matchesRef )
|
||||
if ( allAlleles.get(0).equals(Allele.create(p.getBase(),true)) ) {
|
||||
refQuals.add((double)p.getQual());
|
||||
else
|
||||
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
|
||||
altQuals.add((double)p.getQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p: pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Map.Entry<Allele, Double> entry : el.entrySet()) {
|
||||
|
||||
if (entry.getKey().isReference())
|
||||
refLikelihood = entry.getValue();
|
||||
else {
|
||||
double like = entry.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
}
|
||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
refQuals.add(-10.0*refLikelihood);
|
||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
altQuals.add(-10.0*altLikelihood);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (Map<Allele,Double> el : alleleLikelihoodMap.getLikelihoodMapValues()) {
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el);
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
if (a.isReference())
|
||||
refQuals.add(-10.0*(double)el.get(a));
|
||||
else if (allAlleles.contains(a))
|
||||
altQuals.add(-10.0*(double)el.get(a));
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -61,7 +62,12 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
|||
|
||||
private Set<String> founderIds = new HashSet<String>();
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
if ( ! vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
|
|
@ -73,13 +79,6 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
|||
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(keyNames);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -24,68 +22,26 @@ public class ClippingRankSumTest extends RankSumTest {
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); }
|
||||
|
||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
return;
|
||||
// This working implementation below needs to be tested for the UG pipeline
|
||||
/*
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( p.getBase() == ref ) {
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
} else if ( alts.contains(p.getBase()) ) {
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
protected void fillQualsFromPileup(final List<Allele> allAlleles,
|
||||
final int refLoc,
|
||||
final ReadBackedPileup pileup,
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
// todo - only support non-pileup case for now, e.g. active-region based version
|
||||
if (pileup != null || likelihoodMap == null)
|
||||
return;
|
||||
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
if (a.isReference())
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey()));
|
||||
else if (allAlleles.contains(a))
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey()));
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(read));
|
||||
else
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(read));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
return;
|
||||
// This working implementation below needs to be tested for the UG pipeline
|
||||
|
||||
/*
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p: pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Allele a : el.keySet()) {
|
||||
|
||||
if (a.isReference())
|
||||
refLikelihood =el.get(a);
|
||||
else {
|
||||
double like = el.get(a);
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
}
|
||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
|
|
@ -29,28 +30,30 @@ import java.util.Map;
|
|||
*/
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
|
||||
int depth = 0;
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
||||
depth += sample.getValue().getBasePileup().depthOfCoverage();
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
||||
return map;
|
||||
}
|
||||
if (stratifiedContexts != null) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final List<GATKSAMRecord> alleleBin : alleleBins.values() ) {
|
||||
depth += alleleBin.size();
|
||||
}
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
||||
depth += sample.getValue().getBasePileup().depthOfCoverage();
|
||||
}
|
||||
else if (perReadAlleleLikelihoodMap != null) {
|
||||
if ( perReadAlleleLikelihoodMap.size() == 0 )
|
||||
return null;
|
||||
|
||||
for ( Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perReadAlleleLikelihoodMap.entrySet() )
|
||||
depth += sample.getValue().getNumberOfStoredElements();
|
||||
}
|
||||
else
|
||||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
||||
|
|
|
|||
|
|
@ -6,11 +6,13 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
|
@ -19,6 +21,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -34,25 +37,33 @@ import java.util.List;
|
|||
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
|
||||
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
|
||||
* are unambiguously informative about the alternate allele). Because of this fact and
|
||||
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
|
||||
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
|
||||
* determine the genotype calls.
|
||||
* the AD isn't necessarily calculated exactly for indels. Only reads which are statistically favoring one allele over the other are counted.
|
||||
* Because of this fact, the sum of AD may be different than the individual sample depth, especially when there are
|
||||
* many non-informatice reads.
|
||||
* Because the AD includes reads and bases that were filtered by the Unified Genotyper and in case of indels is based on a statistical computation,
|
||||
* <b>one should not base assumptions about the underlying genotype based on it</b>;
|
||||
* instead, the genotype likelihoods (PLs) are what determine the genotype calls.
|
||||
*/
|
||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
public void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final AlignmentContext stratifiedContext,
|
||||
final VariantContext vc,
|
||||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return;
|
||||
|
||||
if ( vc.isSNP() )
|
||||
annotateSNP(stratifiedContext, vc, gb);
|
||||
else if ( vc.isIndel() )
|
||||
annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
|
||||
if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
|
||||
annotateWithLikelihoods(alleleLikelihoodMap, vc, gb);
|
||||
else if ( stratifiedContext != null && (vc.isSNP()))
|
||||
annotateWithPileup(stratifiedContext, vc, gb);
|
||||
}
|
||||
|
||||
private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
private void annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
|
||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||
for ( Allele allele : vc.getAlleles() )
|
||||
|
|
@ -73,48 +84,29 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
if ( pileup == null )
|
||||
return;
|
||||
|
||||
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
|
||||
final Allele refAllele = vc.getReference();
|
||||
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
alleleCounts.put(allele, 0);
|
||||
}
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
if (!vc.getAlleles().contains(a))
|
||||
continue; // sanity check - shouldn't be needed
|
||||
alleleCounts.put(a,alleleCounts.get(a)+1);
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.isBeforeInsertion() ) {
|
||||
|
||||
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
|
||||
if ( alleleCounts.containsKey(insertion) ) {
|
||||
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
|
||||
}
|
||||
|
||||
} else if ( p.isBeforeDeletionStart() ) {
|
||||
if ( p.getEventLength() == refAllele.length() - 1 ) {
|
||||
// this is indeed the deletion allele recorded in VC
|
||||
final Allele deletion = Allele.create(refBase);
|
||||
if ( alleleCounts.containsKey(deletion) ) {
|
||||
alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
|
||||
}
|
||||
}
|
||||
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
|
||||
alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
|
||||
}
|
||||
}
|
||||
|
||||
final int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(refAllele);
|
||||
counts[0] = alleleCounts.get(vc.getReference());
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
|
||||
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -54,21 +55,30 @@ import java.util.*;
|
|||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private static final String FS = "FS";
|
||||
private static final double MIN_PVALUE = 1E-320;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
int[][] table;
|
||||
|
||||
if ( vc.isSNP() )
|
||||
if (vc.isSNP() && stratifiedContexts != null) {
|
||||
table = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||
else if ( vc.isIndel() || vc.isMixed() ) {
|
||||
table = getIndelContingencyTable(stratifiedContexts);
|
||||
if (table == null)
|
||||
return null;
|
||||
}
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||
}
|
||||
else
|
||||
// for non-snp variants, we need per-read likelihoods.
|
||||
// for snps, we can get same result from simple pileup
|
||||
return null;
|
||||
|
||||
if (table == null)
|
||||
return null;
|
||||
|
||||
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
||||
|
|
@ -80,22 +90,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
final int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||
|
||||
final Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
||||
if ( pvalue == null )
|
||||
return null;
|
||||
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(FS);
|
||||
}
|
||||
|
|
@ -161,7 +155,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
table[0][1] += 1;
|
||||
table[1][1] -= 1;
|
||||
|
||||
return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
|
||||
return (table[0][0] >= 0 && table[1][1] >= 0);
|
||||
}
|
||||
|
||||
private static boolean unrotateTable(int[][] table) {
|
||||
|
|
@ -171,7 +165,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
table[0][1] -= 1;
|
||||
table[1][1] += 1;
|
||||
|
||||
return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
|
||||
return (table[0][1] >= 0 && table[1][0] >= 0);
|
||||
}
|
||||
|
||||
private static double computePValue(int[][] table) {
|
||||
|
|
@ -218,31 +212,31 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
* allele2 # #
|
||||
* @return a 2x2 contingency table
|
||||
*/
|
||||
private static int[][] getContingencyTable(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, Allele ref, Allele alt) {
|
||||
private static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final Allele ref, final Allele alt) {
|
||||
int[][] table = new int[2][2];
|
||||
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
||||
for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||
if ( el.getKey().isReducedRead() ) // ignore reduced reads
|
||||
continue;
|
||||
final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref,true);
|
||||
final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt,true);
|
||||
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alt.equals(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
boolean isFW = read.getReadNegativeStrandFlag();
|
||||
boolean isFW = el.getKey().getReadNegativeStrandFlag();
|
||||
|
||||
int row = matchesRef ? 0 : 1;
|
||||
int column = isFW ? 0 : 1;
|
||||
int row = matchesRef ? 0 : 1;
|
||||
int column = isFW ? 0 : 1;
|
||||
|
||||
table[row][column]++;
|
||||
}
|
||||
table[row][column]++;
|
||||
}
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||
* fw rc
|
||||
|
|
@ -275,69 +269,5 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||
* fw rc
|
||||
* allele1 # #
|
||||
* allele2 # #
|
||||
* @return a 2x2 contingency table
|
||||
*/
|
||||
private static int[][] getIndelContingencyTable(Map<String, AlignmentContext> stratifiedContexts) {
|
||||
final double INDEL_LIKELIHOOD_THRESH = 0.3;
|
||||
final HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
|
||||
if (indelLikelihoodMap == null)
|
||||
return null;
|
||||
|
||||
int[][] table = new int[2][2];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
final AlignmentContext context = sample.getValue();
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( ! RankSumTest.isUsableBase(p, true) || p.getRead().isReducedRead() ) // ignore reduced reads
|
||||
continue;
|
||||
if ( indelLikelihoodMap.containsKey(p) ) {
|
||||
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
|
||||
// A pileup element then has a list of pairs of form (Allele, likelihood of this allele).
|
||||
// To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles.
|
||||
// If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pileup element is "ref"
|
||||
// otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt"
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
|
||||
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Map.Entry<Allele,Double> entry : el.entrySet()) {
|
||||
|
||||
if (entry.getKey().isReference())
|
||||
refLikelihood = entry.getValue();
|
||||
else {
|
||||
double like = entry.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
}
|
||||
}
|
||||
|
||||
boolean matchesRef = (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH));
|
||||
boolean matchesAlt = (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH));
|
||||
if ( matchesRef || matchesAlt ) {
|
||||
int row = matchesRef ? 0 : 1;
|
||||
int column = isFW ? 0 : 1;
|
||||
|
||||
table[row][column]++;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -25,7 +26,12 @@ import java.util.Map;
|
|||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
double content = computeGCContent(ref);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
||||
|
|
|
|||
|
|
@ -28,10 +28,12 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -55,17 +57,31 @@ import java.util.*;
|
|||
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
|
||||
* Note that the Haplotype Score is only calculated for sites with read coverage.
|
||||
*/
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||
private final static char REGEXP_WILDCARD = '.';
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if (vc.isSNP() && stratifiedContexts != null)
|
||||
return annotatePileup(ref, stratifiedContexts, vc);
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null && vc.isVariant())
|
||||
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!vc.isSNP() && !vc.isIndel() && !vc.isMixed())
|
||||
private Map<String, Object> annotatePileup(final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc) {
|
||||
|
||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
||||
return null;
|
||||
|
||||
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
|
||||
|
|
@ -86,14 +102,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (thisContext != null) {
|
||||
final ReadBackedPileup thisPileup = thisContext.getBasePileup();
|
||||
if (vc.isSNP())
|
||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
else if (vc.isIndel() || vc.isMixed()) {
|
||||
Double d = scoreIndelsAgainstHaplotypes(thisPileup);
|
||||
if (d == null)
|
||||
return null;
|
||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -104,6 +113,31 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc) {
|
||||
|
||||
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
|
||||
for (final Genotype genotype : vc.getGenotypes()) {
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
if (perReadAlleleLikelihoodMap == null)
|
||||
continue;
|
||||
|
||||
Double d = scoreIndelsAgainstHaplotypes(perReadAlleleLikelihoodMap);
|
||||
if (d == null)
|
||||
continue;
|
||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
|
||||
// if (scoreRA.observationCount() == 0)
|
||||
// return null;
|
||||
|
||||
// annotate the score in the info field
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
private static class HaplotypeComparator implements Comparator<Haplotype>, Serializable {
|
||||
|
||||
public int compare(Haplotype a, Haplotype b) {
|
||||
|
|
@ -178,7 +212,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
int readOffsetFromPileup = p.getOffset();
|
||||
|
||||
final byte[] haplotypeBases = new byte[contextSize];
|
||||
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
||||
|
|
@ -190,7 +223,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
byte[] readQuals = read.getBaseQualities();
|
||||
readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||
|
||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus);
|
||||
final int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus);
|
||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
||||
|
||||
for (int i = 0; i < contextSize; i++) {
|
||||
|
|
@ -347,31 +380,26 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
|
||||
private Double scoreIndelsAgainstHaplotypes(final ReadBackedPileup pileup) {
|
||||
private Double scoreIndelsAgainstHaplotypes(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||
|
||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
|
||||
if (indelLikelihoodMap == null)
|
||||
if (perReadAlleleLikelihoodMap.isEmpty())
|
||||
return null;
|
||||
|
||||
for (final PileupElement p : pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p);
|
||||
for (Map<Allele,Double> el : perReadAlleleLikelihoodMap.getLikelihoodMapValues()) {
|
||||
|
||||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[el.size()];
|
||||
int i = 0;
|
||||
for (Map.Entry<Allele, Double> a : el.entrySet()) {
|
||||
scores[i++] = -a.getValue();
|
||||
if (DEBUG) {
|
||||
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
||||
}
|
||||
// retrieve likelihood information corresponding to this read
|
||||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[el.size()];
|
||||
int i = 0;
|
||||
for (Map.Entry<Allele, Double> a : el.entrySet()) {
|
||||
scores[i++] = -a.getValue();
|
||||
if (DEBUG) {
|
||||
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
||||
}
|
||||
|
||||
haplotypeScores.add(scores);
|
||||
}
|
||||
|
||||
haplotypeScores.add(scores);
|
||||
}
|
||||
|
||||
// indel likelihoods are strict log-probs, not phred scored
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -29,7 +30,12 @@ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgress
|
|||
private static final int MIN_GENOTYPE_QUALITY = 10;
|
||||
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -22,7 +23,12 @@ public class HomopolymerRun extends InfoFieldAnnotation {
|
|||
|
||||
private boolean ANNOTATE_INDELS = true;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
|
||||
if ( !vc.isBiallelic() )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -33,17 +34,18 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
private static final int MIN_SAMPLES = 10;
|
||||
private Set<String> founderIds;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
|
||||
if(founderIds == null)
|
||||
if(founderIds == null && walker != null)
|
||||
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
||||
return calculateIC(vc);
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
return calculateIC(vc);
|
||||
}
|
||||
|
||||
private Map<String, Object> calculateIC(final VariantContext vc) {
|
||||
final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds);
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.IndelUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -18,9 +19,14 @@ import java.util.*;
|
|||
*/
|
||||
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
|
||||
int run;
|
||||
int run;
|
||||
if (vc.isMixed()) {
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -21,7 +22,12 @@ import java.util.Map;
|
|||
*/
|
||||
public class LowMQ extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -30,14 +31,18 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
private Set<Trio> trios;
|
||||
|
||||
private class Trio {
|
||||
String motherId;
|
||||
String fatherId;
|
||||
String childId;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
|
|
|
|||
|
|
@ -2,11 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
|
|
@ -23,60 +25,36 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")); }
|
||||
|
||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( p.getBase() == ref ) {
|
||||
refQuals.add((double)p.getMappingQual());
|
||||
} else if ( alts.contains(p.getBase()) ) {
|
||||
altQuals.add((double)p.getMappingQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
protected void fillQualsFromPileup(final List<Allele> allAlleles,
|
||||
final int refLoc,
|
||||
final ReadBackedPileup pileup,
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap,
|
||||
final List<Double> refQuals, final List<Double> altQuals) {
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)read.getMappingQuality());
|
||||
else
|
||||
altQuals.add((double)read.getMappingQuality());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p: pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
|
||||
// retrieve likelihood information corresponding to this read
|
||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
// by design, first element in LinkedHashMap was ref allele
|
||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
||||
|
||||
for (Map.Entry<Allele,Double> a : el.entrySet()) {
|
||||
|
||||
if (a.getKey().isReference())
|
||||
refLikelihood = a.getValue();
|
||||
else {
|
||||
double like = a.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
if (pileup != null && likelihoodMap == null) {
|
||||
// no per-read likelihoods available:
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
|
||||
refQuals.add((double)p.getMappingQual());
|
||||
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
|
||||
altQuals.add((double)p.getMappingQual());
|
||||
}
|
||||
}
|
||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
refQuals.add((double)p.getMappingQual());
|
||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
||||
altQuals.add((double)p.getMappingQual());
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
if (a.isReference())
|
||||
refQuals.add((double)el.getKey().getMappingQuality());
|
||||
else if (allAlleles.contains(a))
|
||||
altQuals.add((double)el.getKey().getMappingQuality());
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -3,14 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -22,9 +26,25 @@ import java.util.Map;
|
|||
/**
|
||||
* Total count across all samples of mapping quality zero reads
|
||||
*/
|
||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ((vc.isSNP() || !vc.isVariant()) && stratifiedContexts != null)
|
||||
return annotatePileup(ref, stratifiedContexts, vc);
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null && vc.isVariant())
|
||||
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotatePileup(final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -42,6 +62,25 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
|
|||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc) {
|
||||
if (stratifiedPerReadAlleleLikelihoodMap == null)
|
||||
return null;
|
||||
|
||||
int mq0 = 0;
|
||||
for ( PerReadAlleleLikelihoodMap likelihoodMap : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||
for (GATKSAMRecord read : likelihoodMap.getLikelihoodReadMap().keySet()) {
|
||||
|
||||
if (read.getMappingQuality() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", mq0));
|
||||
return map;
|
||||
}
|
||||
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -46,14 +47,19 @@ import java.util.List;
|
|||
* Count for each sample of mapping quality zero reads
|
||||
*/
|
||||
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
||||
public void annotate(RefMetaDataTracker tracker,
|
||||
AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext context,
|
||||
VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
public void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final AlignmentContext stratifiedContext,
|
||||
final VariantContext vc,
|
||||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
|
||||
if ( g == null || !g.isCalled() )
|
||||
return;
|
||||
|
||||
int mq0 = 0;
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
final ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -22,7 +23,12 @@ import java.util.Map;
|
|||
*/
|
||||
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -20,7 +21,12 @@ import java.util.Map;
|
|||
* The number of N bases, counting only SOLiD data
|
||||
*/
|
||||
public class NBaseCount extends InfoFieldAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -28,8 +29,13 @@ import java.util.Map;
|
|||
*/
|
||||
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( !vc.hasLog10PError() || stratifiedContexts.size() == 0 )
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
if ( !vc.hasLog10PError() )
|
||||
return null;
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
|
|
@ -44,11 +50,20 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||
continue;
|
||||
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
if (stratifiedContexts!= null) {
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
depth += context.getBasePileup().depthOfCoverage();
|
||||
|
||||
depth += context.getBasePileup().depthOfCoverage();
|
||||
}
|
||||
else if (perReadAlleleLikelihoodMap != null) {
|
||||
PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty())
|
||||
continue;
|
||||
|
||||
depth += perReadAlleleLikelihoods.getNumberOfStoredElements();
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
|
|
@ -67,39 +82,5 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
|
||||
for ( final Genotype genotype : genotypes ) {
|
||||
|
||||
// we care only about variant calls with likelihoods
|
||||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||
continue;
|
||||
|
||||
final Map<Allele, List<GATKSAMRecord>> alleleBins = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( alleleBins == null )
|
||||
continue;
|
||||
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
||||
depth += alleleBin.getValue().size();
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
return null;
|
||||
|
||||
double QD = -10.0 * vc.getLog10PError() / (double)depth;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
|
@ -18,10 +19,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -29,25 +27,48 @@ import java.util.Map;
|
|||
*/
|
||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
int totalSize = 0, index = 0;
|
||||
int qualities[];
|
||||
if (stratifiedContexts != null) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
int totalSize = 0;
|
||||
for ( AlignmentContext context : stratifiedContexts.values() )
|
||||
totalSize += context.size();
|
||||
for ( AlignmentContext context : stratifiedContexts.values() )
|
||||
totalSize += context.size();
|
||||
|
||||
final int[] qualities = new int[totalSize];
|
||||
int index = 0;
|
||||
qualities = new int[totalSize];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
AlignmentContext context = sample.getValue();
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[index++] = p.getMappingQual();
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
AlignmentContext context = sample.getValue();
|
||||
for (PileupElement p : context.getBasePileup() )
|
||||
index = fillMappingQualitiesFromPileupAndUpdateIndex(p.getRead(), index, qualities);
|
||||
}
|
||||
}
|
||||
else if (perReadAlleleLikelihoodMap != null) {
|
||||
if ( perReadAlleleLikelihoodMap.size() == 0 )
|
||||
return null;
|
||||
|
||||
for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() )
|
||||
totalSize += perReadLikelihoods.size();
|
||||
|
||||
qualities = new int[totalSize];
|
||||
for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) {
|
||||
for (GATKSAMRecord read : perReadLikelihoods.getStoredElements())
|
||||
index = fillMappingQualitiesFromPileupAndUpdateIndex(read, index, qualities);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
return null;
|
||||
|
||||
|
||||
|
||||
double rms = MathUtils.rms(qualities);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
|
@ -55,32 +76,12 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
private static int fillMappingQualitiesFromPileupAndUpdateIndex(final GATKSAMRecord read, final int inputIdx, final int[] qualities) {
|
||||
int outputIdx = inputIdx;
|
||||
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[outputIdx++] = read.getMappingQuality();
|
||||
|
||||
int depth = 0;
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
||||
depth += alleleBin.getValue().size();
|
||||
}
|
||||
}
|
||||
|
||||
final int[] qualities = new int[depth];
|
||||
int index = 0;
|
||||
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final List<GATKSAMRecord> reads : alleleBins.values() ) {
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[index++] = read.getMappingQuality();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", MathUtils.rms(qualities)));
|
||||
return map;
|
||||
return outputIdx;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -28,12 +29,15 @@ import java.util.Map;
|
|||
* Abstract root for all RankSum based annotations
|
||||
*/
|
||||
public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
|
||||
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0)
|
||||
return null;
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
// either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if (genotypes == null || genotypes.size() == 0)
|
||||
|
|
@ -42,37 +46,28 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
|||
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||
|
||||
if ( vc.isSNP() ) {
|
||||
final List<Byte> altAlleles = new ArrayList<Byte>();
|
||||
for ( final Allele a : vc.getAlternateAlleles() )
|
||||
altAlleles.add(a.getBases()[0]);
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
PerReadAlleleLikelihoodMap indelLikelihoodMap = null;
|
||||
ReadBackedPileup pileup = null;
|
||||
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
|
||||
if (stratifiedContexts != null) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
||||
if ( context != null )
|
||||
pileup = context.getBasePileup();
|
||||
}
|
||||
} else if ( vc.isIndel() || vc.isMixed() ) {
|
||||
if (stratifiedPerReadAlleleLikelihoodMap != null )
|
||||
indelLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
|
||||
for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (context == null) {
|
||||
continue;
|
||||
}
|
||||
if (indelLikelihoodMap != null && indelLikelihoodMap.isEmpty())
|
||||
indelLikelihoodMap = null;
|
||||
// treat an empty likelihood map as a null reference - will simplify contract with fillQualsFromPileup
|
||||
if (indelLikelihoodMap == null && pileup == null)
|
||||
continue;
|
||||
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup == null)
|
||||
continue;
|
||||
|
||||
if (IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap() == null ||
|
||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().size() == 0)
|
||||
return null;
|
||||
|
||||
fillIndelQualsFromPileup(pileup, refQuals, altQuals);
|
||||
}
|
||||
} else
|
||||
fillQualsFromPileup(vc.getAlleles(), vc.getStart(), pileup, indelLikelihoodMap, refQuals, altQuals );
|
||||
}
|
||||
if (refQuals.isEmpty() && altQuals.isEmpty())
|
||||
return null;
|
||||
|
||||
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
||||
|
|
@ -103,50 +98,12 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0)
|
||||
return null;
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if (genotypes == null || genotypes.size() == 0)
|
||||
return null;
|
||||
|
||||
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
final Map<Allele, List<GATKSAMRecord>> context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), vc.getStart(), context, refQuals, altQuals);
|
||||
}
|
||||
|
||||
if ( refQuals.size() == 0 || altQuals.size() == 0 )
|
||||
return null;
|
||||
|
||||
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
||||
for (final Double qual : altQuals) {
|
||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
|
||||
}
|
||||
for (final Double qual : refQuals) {
|
||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
|
||||
}
|
||||
|
||||
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
|
||||
final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
|
||||
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
if (!Double.isNaN(testResults.first))
|
||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
||||
return map;
|
||||
}
|
||||
|
||||
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
|
||||
|
||||
protected abstract void fillQualsFromPileup(final byte ref, final List<Byte> alts, final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
||||
|
||||
protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
||||
protected abstract void fillQualsFromPileup(final List<Allele> alleles,
|
||||
final int refLoc,
|
||||
final ReadBackedPileup readBackedPileup,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,
|
||||
final List<Double> refQuals,
|
||||
final List<Double> altQuals);
|
||||
|
||||
/**
|
||||
* Can the base in this pileup element be used in comparative tests between ref / alt bases?
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -32,98 +32,64 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
|||
return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"));
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
for (final PileupElement p : pileup) {
|
||||
if (isUsableBase(p)) {
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead());
|
||||
if (readPos > numAlignedBases / 2)
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
protected void fillQualsFromPileup(final List<Allele> allAlleles,
|
||||
final int refLoc,
|
||||
final ReadBackedPileup pileup,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,
|
||||
final List<Double> refQuals, final List<Double> altQuals) {
|
||||
|
||||
if (alleleLikelihoodMap == null) {
|
||||
// use fast SNP-based version if we don't have per-read allele likelihoods
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( isUsableBase(p) ) {
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||
|
||||
if ( p.getBase() == ref )
|
||||
refQuals.add((double) readPos);
|
||||
else if ( alts.contains(p.getBase()) )
|
||||
altQuals.add((double) readPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
readPos = getFinalReadPosition(p.getRead(),readPos);
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
|
||||
continue;
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
|
||||
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
|
||||
if (readPos > numAlignedBases / 2)
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
|
||||
if ( matchesRef )
|
||||
refQuals.add((double) readPos);
|
||||
else
|
||||
altQuals.add((double) readPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele
|
||||
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
|
||||
// A pileup element then has a list of pairs of form (Allele, likelihood of this allele).
|
||||
// To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles.
|
||||
// If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pielup element is "ref"
|
||||
// otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt"
|
||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
for (final PileupElement p : pileup) {
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read
|
||||
double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele
|
||||
|
||||
for (Map.Entry<Allele,Double> a : el.entrySet()) {
|
||||
if (a.getKey().isReference())
|
||||
refLikelihood = a.getValue();
|
||||
else {
|
||||
double like = a.getValue();
|
||||
if (like >= altLikelihood)
|
||||
altLikelihood = like;
|
||||
if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
|
||||
refQuals.add((double)readPos);
|
||||
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
|
||||
altQuals.add((double)readPos);
|
||||
}
|
||||
}
|
||||
|
||||
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
|
||||
final int numAlignedBases = getNumAlignedBases(p.getRead());
|
||||
|
||||
if (readPos > numAlignedBases / 2) {
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
}
|
||||
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
|
||||
|
||||
|
||||
// if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads
|
||||
// where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping.
|
||||
// if (readPos < -1)
|
||||
// return;
|
||||
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
refQuals.add((double) readPos);
|
||||
//if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
} else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
||||
altQuals.add((double) readPos);
|
||||
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
final GATKSAMRecord read = el.getKey();
|
||||
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
|
||||
continue;
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
|
||||
if (readPos > numAlignedBases / 2)
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
|
||||
// int readPos = getOffsetFromClippedReadStart(el.getKey(), el.getKey().getOffset());
|
||||
// readPos = getFinalReadPosition(el.getKey().getRead(),readPos);
|
||||
|
||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (a.isNoCall())
|
||||
continue; // read is non-informative
|
||||
if (a.isReference())
|
||||
refQuals.add((double)readPos);
|
||||
else if (allAlleles.contains(a))
|
||||
altQuals.add((double)readPos);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int getFinalReadPosition(GATKSAMRecord read, int initialReadPosition) {
|
||||
final int numAlignedBases = getNumAlignedBases(read);
|
||||
|
||||
int readPos = initialReadPosition;
|
||||
if (initialReadPosition > numAlignedBases / 2) {
|
||||
readPos = numAlignedBases - (initialReadPosition + 1);
|
||||
}
|
||||
return readPos;
|
||||
|
||||
}
|
||||
int getNumClippedBasesAtStart(SAMRecord read) {
|
||||
// compute total number of clipped bases (soft or hard clipped)
|
||||
// check for hard clips (never consider these bases):
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -46,7 +47,12 @@ import java.util.Map;
|
|||
*/
|
||||
public class SampleList extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -225,7 +226,12 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
|||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||
|
||||
// Get only SnpEff records that start at this locus, not merely span it:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -22,7 +23,12 @@ import java.util.Map;
|
|||
*/
|
||||
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -47,7 +48,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
private static final String STR_PRESENT = "STR";
|
||||
private static final String REPEAT_UNIT_KEY = "RU";
|
||||
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( !vc.isIndel())
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -28,7 +29,12 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi
|
|||
private String n454 ="Num454";
|
||||
private String nSolid = "NumSOLiD";
|
||||
private String nOther = "NumOther";
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -28,7 +29,12 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
|||
private Set<Sample> trios = null;
|
||||
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( trios == null ) {
|
||||
if ( walker instanceof VariantAnnotator ) {
|
||||
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -178,7 +179,18 @@ public class VariantAnnotatorEngine {
|
|||
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
VariantContext vc) {
|
||||
return annotateContext(tracker, ref, stratifiedContexts, vc, null);
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
VariantContext vc,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
// annotate db occurrences
|
||||
|
|
@ -189,7 +201,7 @@ public class VariantAnnotatorEngine {
|
|||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc);
|
||||
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap);
|
||||
if ( annotationsFromCurrentType != null )
|
||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||
}
|
||||
|
|
@ -198,22 +210,25 @@ public class VariantAnnotatorEngine {
|
|||
VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
|
||||
// annotate genotypes, creating another new VC in the process
|
||||
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make();
|
||||
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap)).make();
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
public VariantContext annotateContext(final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap, VariantContext vc) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(stratifiedContexts, vc);
|
||||
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(perReadAlleleLikelihoodMap, vc);
|
||||
if ( annotationsFromCurrentType != null ) {
|
||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||
}
|
||||
}
|
||||
|
||||
// generate a new annotated VC
|
||||
return new VariantContextBuilder(vc).attributes(infoAnnotations).make();
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
|
||||
// annotate genotypes, creating another new VC in the process
|
||||
return builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make();
|
||||
}
|
||||
|
||||
private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
|
||||
|
|
@ -266,20 +281,30 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
}
|
||||
|
||||
private GenotypesContext annotateGenotypes(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
private GenotypesContext annotateGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( requestedGenotypeAnnotations.isEmpty() )
|
||||
return vc.getGenotypes();
|
||||
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
AlignmentContext context = null;
|
||||
PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = null;
|
||||
if (stratifiedContexts != null)
|
||||
context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (stratifiedPerReadAlleleLikelihoodMap != null)
|
||||
perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
|
||||
if ( context == null ) {
|
||||
if ( context == null && perReadAlleleLikelihoodMap == null) {
|
||||
// no likelihoods nor pileup available: just move on to next sample
|
||||
genotypes.add(genotype);
|
||||
} else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb);
|
||||
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -10,8 +11,8 @@ import java.util.Map;
|
|||
|
||||
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
|
||||
public interface ActiveRegionBasedAnnotation extends AnnotationType {
|
||||
// return annotations for the given contexts split by sample and then allele
|
||||
public abstract Map<String, Object> annotate(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, final VariantContext vc);
|
||||
// return annotations for the given contexts split by sample and then read likelihood
|
||||
public abstract Map<String, Object> annotate(final Map<String,PerReadAlleleLikelihoodMap> stratifiedContexts, final VariantContext vc);
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
|
@ -13,9 +14,14 @@ import java.util.List;
|
|||
public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation {
|
||||
|
||||
// return annotations for the given contexts/genotype split by sample
|
||||
public abstract void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker,
|
||||
ReferenceContext ref, AlignmentContext stratifiedContext,
|
||||
VariantContext vc, Genotype g, GenotypeBuilder gb );
|
||||
public abstract void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final AlignmentContext stratifiedContext,
|
||||
final VariantContext vc,
|
||||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap);
|
||||
|
||||
// return the descriptions used for the VCF FORMAT meta field
|
||||
public abstract List<VCFFormatHeaderLine> getDescriptions();
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -11,8 +12,25 @@ import java.util.Map;
|
|||
|
||||
public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation {
|
||||
// return annotations for the given contexts split by sample
|
||||
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker,
|
||||
ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc);
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc) {
|
||||
return annotate(tracker, walker, ref, stratifiedContexts, vc, null);
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap, VariantContext vc) {
|
||||
return annotate(null, null, null, null, vc, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
|
||||
public abstract Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap);
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
||||
|
|
|
|||
|
|
@ -103,7 +103,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser);
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap);
|
||||
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
|
|
@ -115,4 +116,5 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -48,24 +48,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
||||
return new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||
}
|
||||
};
|
||||
|
||||
private LinkedHashMap<Allele, Haplotype> haplotypeMap;
|
||||
|
||||
// gdebug removeme
|
||||
// todo -cleanup
|
||||
private GenomeLoc lastSiteVisited;
|
||||
private List<Allele> alleleList = new ArrayList<Allele>();
|
||||
|
||||
static {
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
}
|
||||
|
||||
|
||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
|
|
@ -93,16 +80,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser) {
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
// if (!ref.getLocus().equals(lastSiteVisited)) {
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
// starting a new site: clear allele list
|
||||
lastSiteVisited = ref.getLocus();
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
haplotypeMap.clear();
|
||||
|
||||
perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods
|
||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
|
|
@ -130,10 +116,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
|
||||
// no likelihoods have been computed for this sample at this site
|
||||
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()));
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
|
@ -150,10 +140,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return builder.genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
|
||||
return indelLikelihoodMap.get();
|
||||
}
|
||||
|
||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc loc,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
|
||||
//import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class PerReadAlleleLikelihoodMap {
|
||||
public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||
|
||||
private List<Allele> alleles;
|
||||
private Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
|
||||
public PerReadAlleleLikelihoodMap() {
|
||||
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
|
||||
alleles = new ArrayList<Allele>();
|
||||
}
|
||||
|
||||
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
|
||||
Map<Allele,Double> likelihoodMap;
|
||||
if (likelihoodReadMap.containsKey(read)){
|
||||
// seen pileup element before
|
||||
likelihoodMap = likelihoodReadMap.get(read);
|
||||
}
|
||||
else {
|
||||
likelihoodMap = new HashMap<Allele, Double>();
|
||||
likelihoodReadMap.put(read,likelihoodMap);
|
||||
}
|
||||
likelihoodMap.put(a,likelihood);
|
||||
|
||||
if (!alleles.contains(a))
|
||||
alleles.add(a);
|
||||
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return likelihoodReadMap.size();
|
||||
}
|
||||
|
||||
public void add(PileupElement p, Allele a, Double likelihood) {
|
||||
add(p.getRead(),a,likelihood);
|
||||
}
|
||||
|
||||
public boolean containsPileupElement(PileupElement p) {
|
||||
return likelihoodReadMap.containsKey(p.getRead());
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return likelihoodReadMap.isEmpty();
|
||||
}
|
||||
|
||||
public Map<GATKSAMRecord,Map<Allele,Double>> getLikelihoodReadMap() {
|
||||
return likelihoodReadMap;
|
||||
}
|
||||
public void clear() {
|
||||
alleles.clear();
|
||||
likelihoodReadMap.clear();
|
||||
}
|
||||
|
||||
public Set<GATKSAMRecord> getStoredElements() {
|
||||
return likelihoodReadMap.keySet();
|
||||
}
|
||||
|
||||
public Collection<Map<Allele,Double>> getLikelihoodMapValues() {
|
||||
return likelihoodReadMap.values();
|
||||
}
|
||||
|
||||
public int getNumberOfStoredElements() {
|
||||
return likelihoodReadMap.size();
|
||||
}
|
||||
/**
|
||||
* Returns list of reads greedily associated with a particular allele.
|
||||
* Needs to loop for each read, and assign to each allele
|
||||
* @param a Desired allele
|
||||
* @return
|
||||
*/
|
||||
@Requires("a!=null")
|
||||
public List<GATKSAMRecord> getReadsAssociatedWithAllele(Allele a) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Map<Allele,Double> getLikelihoodsAssociatedWithPileupElement(PileupElement p) {
|
||||
if (!likelihoodReadMap.containsKey(p.getRead()))
|
||||
return null;
|
||||
|
||||
return likelihoodReadMap.get(p.getRead());
|
||||
}
|
||||
|
||||
public static Allele getMostLikelyAllele(Map<Allele,Double> alleleMap) {
|
||||
double minLike = Double.POSITIVE_INFINITY, maxLike = Double.NEGATIVE_INFINITY;
|
||||
Allele mostLikelyAllele = Allele.NO_CALL;
|
||||
|
||||
for (Map.Entry<Allele,Double> el : alleleMap.entrySet()) {
|
||||
if (el.getValue() > maxLike) {
|
||||
maxLike = el.getValue();
|
||||
mostLikelyAllele = el.getKey();
|
||||
}
|
||||
|
||||
if (el.getValue() < minLike)
|
||||
minLike = el.getValue();
|
||||
|
||||
}
|
||||
if (maxLike-minLike > INDEL_LIKELIHOOD_THRESH)
|
||||
return mostLikelyAllele;
|
||||
else
|
||||
return Allele.NO_CALL;
|
||||
}
|
||||
}
|
||||
|
|
@ -62,7 +62,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser) {
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
perReadAlleleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data
|
||||
|
||||
final byte refBase = ref.getBase();
|
||||
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||
|
|
|
|||
|
|
@ -177,19 +177,23 @@ public class UnifiedGenotyperEngine {
|
|||
final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);
|
||||
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<String,PerReadAlleleLikelihoodMap>();
|
||||
|
||||
if ( models.isEmpty() ) {
|
||||
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
||||
}
|
||||
else {
|
||||
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
||||
perReadAlleleLikelihoodMap.clear();
|
||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||
if ( stratifiedContexts == null ) {
|
||||
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
||||
}
|
||||
else {
|
||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||
if ( vc != null )
|
||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true));
|
||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -219,9 +223,13 @@ public class UnifiedGenotyperEngine {
|
|||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @param perReadAlleleLikelihoodMap Map to store per-sample, per-read, per-allele likelihoods (only used for indels)
|
||||
* @return the VariantContext object
|
||||
*/
|
||||
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
public VariantContext calculateLikelihoods(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||
if ( models.isEmpty() ) {
|
||||
return null;
|
||||
|
|
@ -231,7 +239,7 @@ public class UnifiedGenotyperEngine {
|
|||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||
// return the first valid one we encounter
|
||||
if ( stratifiedContexts != null )
|
||||
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -247,7 +255,10 @@ public class UnifiedGenotyperEngine {
|
|||
* @param vc the GL-annotated variant context
|
||||
* @return the VariantCallContext object
|
||||
*/
|
||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
||||
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final VariantContext vc) {
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||
if ( models.isEmpty() ) {
|
||||
return null;
|
||||
|
|
@ -256,7 +267,7 @@ public class UnifiedGenotyperEngine {
|
|||
// return the first one
|
||||
final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
|
||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -266,7 +277,7 @@ public class UnifiedGenotyperEngine {
|
|||
* @return the VariantCallContext object
|
||||
*/
|
||||
public VariantCallContext calculateGenotypes(VariantContext vc) {
|
||||
return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Model.valueOf("SNP"), false);
|
||||
return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Model.valueOf("SNP"), null);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -277,14 +288,21 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
// private method called by both UnifiedGenotyper and UGCalcLikelihoods entry points into the engine
|
||||
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, AlignmentContext> stratifiedContexts, AlignmentContextUtils.ReadOrientation type, List<Allele> alternateAllelesToUse, boolean useBAQedPileup, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
private VariantContext calculateLikelihoods(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final AlignmentContextUtils.ReadOrientation type,
|
||||
final List<Allele> alternateAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( glcm.get() == null ) {
|
||||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
@ -315,12 +333,22 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vc, false);
|
||||
}
|
||||
|
||||
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model);
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false);
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model, null);
|
||||
}
|
||||
|
||||
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false,perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -334,8 +362,11 @@ public class UnifiedGenotyperEngine {
|
|||
* @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
|
||||
* @return VC with assigned genotypes
|
||||
*/
|
||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final boolean inheritAttributesFromInputVC) {
|
||||
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final boolean inheritAttributesFromInputVC,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||
|
||||
|
|
@ -462,7 +493,7 @@ public class UnifiedGenotyperEngine {
|
|||
List<Allele> allAllelesToUse = builder.make().getAlleles();
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model);
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
|
|
@ -471,7 +502,7 @@ public class UnifiedGenotyperEngine {
|
|||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model);
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
|
|
@ -507,7 +538,7 @@ public class UnifiedGenotyperEngine {
|
|||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
||||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.PairHMM;
|
||||
|
|
@ -40,6 +41,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class PairHMMIndelErrorModel {
|
||||
|
|
@ -167,11 +169,15 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
|
||||
|
||||
public synchronized double[] computeDiploidReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele, Haplotype> haplotypeMap, ReferenceContext ref, int eventLength, HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap){
|
||||
public synchronized double[] computeDiploidReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext ref,
|
||||
final int eventLength,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
final int numHaplotypes = haplotypeMap.size();
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, indelLikelihoodMap, readCounts);
|
||||
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||
|
||||
}
|
||||
|
|
@ -181,7 +187,7 @@ public class PairHMMIndelErrorModel {
|
|||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext ref,
|
||||
final int eventLength,
|
||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap,
|
||||
final int[] readCounts) {
|
||||
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
|
||||
final PairHMM pairHMM = new PairHMM(bandedLikelihoods);
|
||||
|
|
@ -192,8 +198,8 @@ public class PairHMMIndelErrorModel {
|
|||
readCounts[readIdx] = p.getRepresentativeCount();
|
||||
|
||||
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
HashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
||||
if (perReadAlleleLikelihoodMap.containsPileupElement(p)) {
|
||||
Map<Allele,Double> el = perReadAlleleLikelihoodMap.getLikelihoodsAssociatedWithPileupElement(p);
|
||||
int j=0;
|
||||
for (Allele a: haplotypeMap.keySet()) {
|
||||
readLikelihoods[readIdx][j++] = el.get(a);
|
||||
|
|
@ -201,7 +207,7 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
else {
|
||||
final int refWindowStart = ref.getWindow().getStart();
|
||||
final int refWindowStop = ref.getWindow().getStop();
|
||||
final int refWindowStop = ref.getWindow().getStop();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
|
||||
|
|
@ -280,7 +286,7 @@ public class PairHMMIndelErrorModel {
|
|||
System.out.format("numStartSoftClippedBases: %d numEndSoftClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
|
||||
numStartSoftClippedBases, numEndSoftClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());
|
||||
|
||||
LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
||||
// LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
||||
|
||||
/**
|
||||
* Check if we'll end up with an empty read once all clipping is done
|
||||
|
|
@ -288,7 +294,7 @@ public class PairHMMIndelErrorModel {
|
|||
if (numStartSoftClippedBases + numEndSoftClippedBases >= unclippedReadBases.length) {
|
||||
int j=0;
|
||||
for (Allele a: haplotypeMap.keySet()) {
|
||||
readEl.put(a,0.0);
|
||||
perReadAlleleLikelihoodMap.add(p,a,0.0);
|
||||
readLikelihoods[readIdx][j++] = 0.0;
|
||||
}
|
||||
}
|
||||
|
|
@ -329,45 +335,45 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
|
||||
|
||||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
|
||||
final int X_METRIC_LENGTH = readBases.length+2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
||||
final int X_METRIC_LENGTH = readBases.length+2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
||||
|
||||
if (matchMetricArray == null) {
|
||||
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
if (matchMetricArray == null) {
|
||||
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
||||
|
||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
}
|
||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
}
|
||||
|
||||
int startIndexInHaplotype = 0;
|
||||
if (previousHaplotypeSeen != null)
|
||||
startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
||||
previousHaplotypeSeen = haplotypeBases.clone();
|
||||
int startIndexInHaplotype = 0;
|
||||
if (previousHaplotypeSeen != null)
|
||||
startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
||||
previousHaplotypeSeen = haplotypeBases.clone();
|
||||
|
||||
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
|
||||
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
|
||||
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
|
||||
contextLogGapContinuationProbabilities,
|
||||
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
||||
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
|
||||
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
|
||||
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
|
||||
contextLogGapContinuationProbabilities,
|
||||
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
||||
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("H:"+new String(haplotypeBases));
|
||||
System.out.println("R:"+new String(readBases));
|
||||
System.out.format("L:%4.2f\n",readLikelihood);
|
||||
System.out.format("StPos:%d\n", startIndexInHaplotype);
|
||||
}
|
||||
readEl.put(a,readLikelihood);
|
||||
if (DEBUG) {
|
||||
System.out.println("H:"+new String(haplotypeBases));
|
||||
System.out.println("R:"+new String(readBases));
|
||||
System.out.format("L:%4.2f\n",readLikelihood);
|
||||
System.out.format("StPos:%d\n", startIndexInHaplotype);
|
||||
}
|
||||
|
||||
perReadAlleleLikelihoodMap.add(p, a, readLikelihood);
|
||||
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||
}
|
||||
}
|
||||
indelLikelihoodMap.put(p,readEl);
|
||||
}
|
||||
readIdx++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -286,6 +286,7 @@ public class VariantDataManager {
|
|||
case INDEL:
|
||||
case MIXED:
|
||||
case SYMBOLIC:
|
||||
case STRUCTURAL_INDEL:
|
||||
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
|
||||
default:
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
|
||||
private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
|
||||
|
||||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
||||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Store the original AC, AF, and AN values in the INFO field after selecting (using keys AC_Orig, AF_Orig, and AN_Orig)", required=false)
|
||||
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -444,29 +445,8 @@ public class BaseUtils {
|
|||
* @param bases the bases
|
||||
* @return the upper cased version
|
||||
*/
|
||||
static public byte[] convertToUpperCase(final byte[] bases) {
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
if ( (char)bases[i] >= 'a' )
|
||||
bases[i] = toUpperCaseBase(bases[i]);
|
||||
}
|
||||
return bases;
|
||||
}
|
||||
|
||||
static public byte toUpperCaseBase(final byte base) {
|
||||
switch (base) {
|
||||
case 'a':
|
||||
return 'A';
|
||||
case 'c':
|
||||
return 'C';
|
||||
case 'g':
|
||||
return 'G';
|
||||
case 't':
|
||||
return 'T';
|
||||
case 'n':
|
||||
return 'N';
|
||||
default:
|
||||
return base;
|
||||
}
|
||||
static public void convertToUpperCase(final byte[] bases) {
|
||||
StringUtil.toUpperCase(bases);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -625,6 +625,10 @@ public class MathUtils {
|
|||
return maxElementIndex(array, array.length);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final byte[] array) {
|
||||
return maxElementIndex(array, array.length);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final int[] array, int endIndex) {
|
||||
if (array == null || array.length == 0)
|
||||
throw new IllegalArgumentException("Array cannot be null!");
|
||||
|
|
@ -638,6 +642,24 @@ public class MathUtils {
|
|||
return maxI;
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final byte[] array, int endIndex) {
|
||||
if (array == null || array.length == 0)
|
||||
throw new IllegalArgumentException("Array cannot be null!");
|
||||
|
||||
int maxI = 0;
|
||||
for (int i = 1; i < endIndex; i++) {
|
||||
if (array[i] > array[maxI])
|
||||
maxI = i;
|
||||
}
|
||||
|
||||
return maxI;
|
||||
}
|
||||
|
||||
public static byte arrayMax(final byte[] array) {
|
||||
return array[maxElementIndex(array)];
|
||||
}
|
||||
|
||||
|
||||
public static double arrayMax(final double[] array) {
|
||||
return array[maxElementIndex(array)];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
|
|
@ -58,9 +59,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
}
|
||||
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return referenceReader.getSubsequenceAt( extendedLoc.getContig(),
|
||||
Math.max(1, extendedLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(extendedLoc.getContig()).getSequenceLength(), extendedLoc.getStop() + padding) ).getBases();
|
||||
return getReference( referenceReader, padding, extendedLoc );
|
||||
}
|
||||
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
|
|
@ -68,11 +67,16 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
}
|
||||
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(),
|
||||
Math.max(1, fullExtentReferenceLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
|
||||
return getReference( referenceReader, padding, fullExtentReferenceLoc );
|
||||
}
|
||||
|
||||
private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(),
|
||||
Math.max(1, genomeLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
|
||||
StringUtil.toUpperCase(reference);
|
||||
return reference;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() { return activeRegionLoc; }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Creates threads that automatically monitor their efficiency via the parent ThreadEfficiencyMonitor
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/14/12
|
||||
* Time: 8:47 AM
|
||||
*/
|
||||
@Invariant({
|
||||
"activeThreads.size() <= nThreadsToCreate",
|
||||
"countDownLatch.getCount() <= nThreadsToCreate",
|
||||
"nThreadsCreated <= nThreadsToCreate"
|
||||
})
|
||||
public class EfficiencyMonitoringThreadFactory extends ThreadEfficiencyMonitor implements ThreadFactory {
|
||||
final int nThreadsToCreate;
|
||||
final List<Thread> activeThreads;
|
||||
|
||||
int nThreadsCreated = 0;
|
||||
|
||||
/**
|
||||
* Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
|
||||
* times. Counts down from nThreadsToCreate to 0, at which point any code waiting
|
||||
* on the final times is freed to run.
|
||||
*/
|
||||
final CountDownLatch countDownLatch;
|
||||
|
||||
/**
|
||||
* Create a new factory generating threads whose runtime and contention
|
||||
* behavior is tracked in this factory.
|
||||
*
|
||||
* @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
|
||||
*/
|
||||
public EfficiencyMonitoringThreadFactory(final int nThreadsToCreate) {
|
||||
super();
|
||||
if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
|
||||
|
||||
this.nThreadsToCreate = nThreadsToCreate;
|
||||
activeThreads = new ArrayList<Thread>(nThreadsToCreate);
|
||||
countDownLatch = new CountDownLatch(nThreadsToCreate);
|
||||
}
|
||||
|
||||
/**
|
||||
* How many threads have been created by this factory so far?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getNThreadsCreated() {
|
||||
return nThreadsCreated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Only useful for testing, so that we can wait for all of the threads in the factory to complete running
|
||||
*
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
protected void waitForAllThreadsToComplete() throws InterruptedException {
|
||||
countDownLatch.await();
|
||||
}
|
||||
|
||||
@Ensures({
|
||||
"activeThreads.size() <= old(activeThreads.size())",
|
||||
"! activeThreads.contains(thread)",
|
||||
"countDownLatch.getCount() <= old(countDownLatch.getCount())"
|
||||
})
|
||||
@Override
|
||||
public synchronized void threadIsDone(final Thread thread) {
|
||||
nThreadsAnalyzed++;
|
||||
|
||||
if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
|
||||
|
||||
super.threadIsDone(thread);
|
||||
|
||||
// remove the thread from the list of active activeThreads, if it's in there, and decrement the countdown latch
|
||||
if ( activeThreads.remove(thread) ) {
|
||||
// one less thread is live for those blocking on all activeThreads to be complete
|
||||
countDownLatch.countDown();
|
||||
if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new thread from this factory
|
||||
*
|
||||
* @param runnable
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
@Ensures({
|
||||
"activeThreads.size() > old(activeThreads.size())",
|
||||
"activeThreads.contains(result)",
|
||||
"nThreadsCreated == old(nThreadsCreated) + 1"
|
||||
})
|
||||
public synchronized Thread newThread(final Runnable runnable) {
|
||||
if ( activeThreads.size() >= nThreadsToCreate)
|
||||
throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
|
||||
|
||||
nThreadsCreated++;
|
||||
final Thread myThread = new TrackingThread(runnable);
|
||||
activeThreads.add(myThread);
|
||||
return myThread;
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
|
||||
*/
|
||||
private class TrackingThread extends Thread {
|
||||
private TrackingThread(Runnable runnable) {
|
||||
super(runnable);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
super.run();
|
||||
threadIsDone(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,293 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
||||
/**
|
||||
* Create activeThreads, collecting statistics about their running state over time
|
||||
*
|
||||
* Uses a ThreadMXBean to capture info via ThreadInfo
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/14/12
|
||||
* Time: 8:47 AM
|
||||
*/
|
||||
@Invariant({
|
||||
"activeThreads.size() <= nThreadsToCreate",
|
||||
"countDownLatch.getCount() <= nThreadsToCreate",
|
||||
"nThreadsToCreated <= nThreadsToCreate"
|
||||
})
|
||||
public class StateMonitoringThreadFactory implements ThreadFactory {
|
||||
protected static final boolean DEBUG = false;
|
||||
private static Logger logger = Logger.getLogger(StateMonitoringThreadFactory.class);
|
||||
public static final List<Thread.State> TRACKED_STATES = Arrays.asList(Thread.State.BLOCKED, Thread.State.RUNNABLE, Thread.State.WAITING);
|
||||
|
||||
// todo -- it would be nice to not have to specify upfront the number of threads.
|
||||
// todo -- can we dynamically increment countDownLatch? It seems not...
|
||||
final int nThreadsToCreate;
|
||||
final List<Thread> activeThreads;
|
||||
final EnumMap<Thread.State, Long> times = new EnumMap<Thread.State, Long>(Thread.State.class);
|
||||
|
||||
int nThreadsToCreated = 0;
|
||||
|
||||
/**
|
||||
* The bean used to get the thread info about blocked and waiting times
|
||||
*/
|
||||
final ThreadMXBean bean;
|
||||
|
||||
/**
|
||||
* Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
|
||||
* times. Counts down from nThreadsToCreate to 0, at which point any code waiting
|
||||
* on the final times is freed to run.
|
||||
*/
|
||||
final CountDownLatch countDownLatch;
|
||||
|
||||
/**
|
||||
* Instead of RUNNABLE we want to print running. This map goes from Thread.State names to human readable ones
|
||||
*/
|
||||
final static EnumMap<Thread.State, String> PRETTY_NAMES = new EnumMap<Thread.State, String>(Thread.State.class);
|
||||
static {
|
||||
PRETTY_NAMES.put(Thread.State.RUNNABLE, "running");
|
||||
PRETTY_NAMES.put(Thread.State.BLOCKED, "blocked");
|
||||
PRETTY_NAMES.put(Thread.State.WAITING, "waiting");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new factory generating threads whose runtime and contention
|
||||
* behavior is tracked in this factory.
|
||||
*
|
||||
* @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
|
||||
* // TODO -- remove argument when we figure out how to implement this capability
|
||||
*/
|
||||
public StateMonitoringThreadFactory(final int nThreadsToCreate) {
|
||||
if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
|
||||
|
||||
this.nThreadsToCreate = nThreadsToCreate;
|
||||
activeThreads = new ArrayList<Thread>(nThreadsToCreate);
|
||||
|
||||
// initialize times to 0
|
||||
for ( final Thread.State state : Thread.State.values() )
|
||||
times.put(state, 0l);
|
||||
|
||||
// get the bean, and start tracking
|
||||
bean = ManagementFactory.getThreadMXBean();
|
||||
if ( bean.isThreadContentionMonitoringSupported() )
|
||||
bean.setThreadContentionMonitoringEnabled(true);
|
||||
else
|
||||
logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
|
||||
//bean.setThreadCpuTimeEnabled(true);
|
||||
|
||||
countDownLatch = new CountDownLatch(nThreadsToCreate);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the time spent in state across all threads created by this factory
|
||||
*
|
||||
* @param state on of the TRACKED_STATES
|
||||
* @return the time in milliseconds
|
||||
*/
|
||||
@Ensures({"result >= 0", "TRACKED_STATES.contains(state)"})
|
||||
public synchronized long getStateTime(final Thread.State state) {
|
||||
return times.get(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total time spent in all states across all threads created by this factory
|
||||
*
|
||||
* @return the time in milliseconds
|
||||
*/
|
||||
@Ensures({"result >= 0"})
|
||||
public synchronized long getTotalTime() {
|
||||
long total = 0;
|
||||
for ( final long time : times.values() )
|
||||
total += time;
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the fraction of time spent in state across all threads created by this factory
|
||||
*
|
||||
* @return the fraction (0.0-1.0) of time spent in state over all state times of all threads
|
||||
*/
|
||||
@Ensures({"result >= 0.0", "result <= 1.0", "TRACKED_STATES.contains(state)"})
|
||||
public synchronized double getStateFraction(final Thread.State state) {
|
||||
return getStateTime(state) / (1.0 * Math.max(getTotalTime(), 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* How many threads have been created by this factory so far?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getNThreadsCreated() {
|
||||
return nThreadsToCreated;
|
||||
}
|
||||
|
||||
public void waitForAllThreadsToComplete() throws InterruptedException {
|
||||
countDownLatch.await();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append("total ").append(getTotalTime()).append(" ");
|
||||
for ( final Thread.State state : TRACKED_STATES ) {
|
||||
b.append(state).append(" ").append(getStateTime(state)).append(" ");
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information about threads from this factory to logger
|
||||
* with the INFO priority
|
||||
*
|
||||
* @param logger
|
||||
*/
|
||||
public synchronized void printUsageInformation(final Logger logger) {
|
||||
printUsageInformation(logger, Priority.INFO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information about threads from this factory to logger
|
||||
* with the provided priority
|
||||
*
|
||||
* @param logger
|
||||
*/
|
||||
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
|
||||
logger.log(priority, "Number of activeThreads used: " + getNThreadsCreated());
|
||||
logger.log(priority, "Total runtime " + new AutoFormattingTime(getTotalTime() / 1000.0));
|
||||
for ( final Thread.State state : TRACKED_STATES ) {
|
||||
logger.log(priority, String.format(" Fraction of time spent %s is %.2f (%s)",
|
||||
prettyName(state), getStateFraction(state), new AutoFormattingTime(getStateTime(state) / 1000.0)));
|
||||
}
|
||||
logger.log(priority, String.format("Efficiency of multi-threading: %.2f%% of time spent doing productive work",
|
||||
getStateFraction(Thread.State.RUNNABLE) * 100));
|
||||
}
|
||||
|
||||
private String prettyName(final Thread.State state) {
|
||||
return PRETTY_NAMES.get(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new thread from this factory
|
||||
*
|
||||
* @param runnable
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
@Ensures({
|
||||
"activeThreads.size() > old(activeThreads.size())",
|
||||
"activeThreads.contains(result)",
|
||||
"nThreadsToCreated == old(nThreadsToCreated) + 1"
|
||||
})
|
||||
public synchronized Thread newThread(final Runnable runnable) {
|
||||
if ( activeThreads.size() >= nThreadsToCreate)
|
||||
throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
|
||||
|
||||
nThreadsToCreated++;
|
||||
final Thread myThread = new TrackingThread(runnable);
|
||||
activeThreads.add(myThread);
|
||||
return myThread;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the information about completed thread that ran for runtime in milliseconds
|
||||
*
|
||||
* This method updates all of the key timing and tracking information in the factory so that
|
||||
* thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer
|
||||
*
|
||||
* @param thread
|
||||
* @param runtimeInMilliseconds
|
||||
*/
|
||||
@Ensures({
|
||||
"activeThreads.size() < old(activeThreads.size())",
|
||||
"! activeThreads.contains(thread)",
|
||||
"getTotalTime() >= old(getTotalTime())",
|
||||
"countDownLatch.getCount() < old(countDownLatch.getCount())"
|
||||
})
|
||||
private synchronized void threadIsDone(final Thread thread, final long runtimeInMilliseconds) {
|
||||
if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
|
||||
if ( DEBUG ) logger.warn("UpdateThreadInfo called");
|
||||
|
||||
final ThreadInfo info = bean.getThreadInfo(thread.getId());
|
||||
if ( info != null ) {
|
||||
if ( DEBUG ) logger.warn("Updating thread total runtime " + runtimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
|
||||
incTimes(Thread.State.BLOCKED, info.getBlockedTime());
|
||||
incTimes(Thread.State.WAITING, info.getWaitedTime());
|
||||
incTimes(Thread.State.RUNNABLE, runtimeInMilliseconds - info.getWaitedTime() - info.getBlockedTime());
|
||||
}
|
||||
|
||||
// remove the thread from the list of active activeThreads
|
||||
if ( ! activeThreads.remove(thread) )
|
||||
throw new IllegalStateException("Thread " + thread + " not in list of active activeThreads");
|
||||
|
||||
// one less thread is live for those blocking on all activeThreads to be complete
|
||||
countDownLatch.countDown();
|
||||
if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that increments the times counter by by for state
|
||||
*
|
||||
* @param state
|
||||
* @param by
|
||||
*/
|
||||
private synchronized void incTimes(final Thread.State state, final long by) {
|
||||
times.put(state, times.get(state) + by);
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
|
||||
*/
|
||||
private class TrackingThread extends Thread {
|
||||
private TrackingThread(Runnable runnable) {
|
||||
super(runnable);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
final long startTime = System.currentTimeMillis();
|
||||
super.run();
|
||||
final long endTime = System.currentTimeMillis();
|
||||
threadIsDone(this, endTime - startTime);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.util.EnumMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Uses an MXBean to monitor thread efficiency
|
||||
*
|
||||
* Once the monitor is created, calls to threadIsDone() can be used to add information
|
||||
* about the efficiency of the provided thread to this monitor.
|
||||
*
|
||||
* Provides simple print() for displaying efficiency information to a logger
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/22/12
|
||||
* Time: 10:48 AM
|
||||
*/
|
||||
@Invariant({"nThreadsAnalyzed >= 0"})
|
||||
public class ThreadEfficiencyMonitor {
|
||||
protected static final boolean DEBUG = false;
|
||||
protected static Logger logger = Logger.getLogger(EfficiencyMonitoringThreadFactory.class);
|
||||
final EnumMap<State, Long> times = new EnumMap<State, Long>(State.class);
|
||||
|
||||
/**
|
||||
* The number of threads we've included in our efficiency monitoring
|
||||
*/
|
||||
int nThreadsAnalyzed = 0;
|
||||
|
||||
/**
|
||||
* The bean used to get the thread info about blocked and waiting times
|
||||
*/
|
||||
final ThreadMXBean bean;
|
||||
|
||||
public ThreadEfficiencyMonitor() {
|
||||
bean = ManagementFactory.getThreadMXBean();
|
||||
|
||||
// get the bean, and start tracking
|
||||
if ( bean.isThreadContentionMonitoringSupported() )
|
||||
bean.setThreadContentionMonitoringEnabled(true);
|
||||
else
|
||||
logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
|
||||
//bean.setThreadCpuTimeEnabled(true);
|
||||
|
||||
if ( bean.isThreadCpuTimeSupported() )
|
||||
bean.setThreadCpuTimeEnabled(true);
|
||||
else
|
||||
logger.warn("Thread CPU monitoring not supported, we cannot track GATK multi-threaded efficiency");
|
||||
|
||||
// initialize times to 0
|
||||
for ( final State state : State.values() )
|
||||
times.put(state, 0l);
|
||||
}
|
||||
|
||||
private static long nanoToMilli(final long timeInNano) {
|
||||
return TimeUnit.NANOSECONDS.toMillis(timeInNano);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the time spent in state across all threads created by this factory
|
||||
*
|
||||
* @param state to get information about
|
||||
* @return the time in milliseconds
|
||||
*/
|
||||
@Ensures({"result >= 0"})
|
||||
public synchronized long getStateTime(final State state) {
|
||||
return times.get(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total time spent in all states across all threads created by this factory
|
||||
*
|
||||
* @return the time in milliseconds
|
||||
*/
|
||||
@Ensures({"result >= 0"})
|
||||
public synchronized long getTotalTime() {
|
||||
long total = 0;
|
||||
for ( final long time : times.values() )
|
||||
total += time;
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the fraction of time spent in state across all threads created by this factory
|
||||
*
|
||||
* @return the percentage (0.0-100.0) of time spent in state over all state times of all threads
|
||||
*/
|
||||
@Ensures({"result >= 0.0", "result <= 100.0"})
|
||||
public synchronized double getStatePercent(final State state) {
|
||||
return (100.0 * getStateTime(state)) / Math.max(getTotalTime(), 1);
|
||||
}
|
||||
|
||||
public int getnThreadsAnalyzed() {
|
||||
return nThreadsAnalyzed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String toString() {
|
||||
final StringBuilder b = new StringBuilder();
|
||||
|
||||
b.append("total ").append(getTotalTime()).append(" ");
|
||||
for ( final State state : State.values() ) {
|
||||
b.append(state).append(" ").append(getStateTime(state)).append(" ");
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information about threads from this factory to logger
|
||||
* with the INFO priority
|
||||
*
|
||||
* @param logger
|
||||
*/
|
||||
public synchronized void printUsageInformation(final Logger logger) {
|
||||
printUsageInformation(logger, Priority.INFO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information about threads from this factory to logger
|
||||
* with the provided priority
|
||||
*
|
||||
* @param logger
|
||||
*/
|
||||
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
|
||||
logger.debug("Number of threads monitored: " + getnThreadsAnalyzed());
|
||||
logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime())));
|
||||
for ( final State state : State.values() ) {
|
||||
logger.debug(String.format("\tPercent of time spent %s is %.2f", state.getUserFriendlyName(), getStatePercent(state)));
|
||||
}
|
||||
logger.log(priority, String.format("CPU efficiency : %6.2f%% of time spent %s", getStatePercent(State.USER_CPU), State.USER_CPU.getUserFriendlyName()));
|
||||
logger.log(priority, String.format("Walker inefficiency : %6.2f%% of time spent %s", getStatePercent(State.BLOCKING), State.BLOCKING.getUserFriendlyName()));
|
||||
logger.log(priority, String.format("I/O inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING_FOR_IO), State.WAITING_FOR_IO.getUserFriendlyName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the information about completed thread that ran for runtime in milliseconds
|
||||
*
|
||||
* This method updates all of the key timing and tracking information in the factory so that
|
||||
* thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer
|
||||
*
|
||||
* @param thread the thread whose information we are updating
|
||||
*/
|
||||
@Ensures({
|
||||
"getTotalTime() >= old(getTotalTime())"
|
||||
})
|
||||
public synchronized void threadIsDone(final Thread thread) {
|
||||
nThreadsAnalyzed++;
|
||||
|
||||
if ( DEBUG ) logger.warn("UpdateThreadInfo called");
|
||||
|
||||
final long threadID = thread.getId();
|
||||
final ThreadInfo info = bean.getThreadInfo(thread.getId());
|
||||
final long totalTimeNano = bean.getThreadCpuTime(threadID);
|
||||
final long userTimeNano = bean.getThreadUserTime(threadID);
|
||||
final long systemTimeNano = totalTimeNano - userTimeNano;
|
||||
final long userTimeInMilliseconds = nanoToMilli(userTimeNano);
|
||||
final long systemTimeInMilliseconds = nanoToMilli(systemTimeNano);
|
||||
|
||||
if ( info != null ) {
|
||||
if ( DEBUG ) logger.warn("Updating thread with user runtime " + userTimeInMilliseconds + " and system runtime " + systemTimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
|
||||
incTimes(State.BLOCKING, info.getBlockedTime());
|
||||
incTimes(State.WAITING, info.getWaitedTime());
|
||||
incTimes(State.USER_CPU, userTimeInMilliseconds);
|
||||
incTimes(State.WAITING_FOR_IO, systemTimeInMilliseconds);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that increments the times counter by by for state
|
||||
*
|
||||
* @param state
|
||||
* @param by
|
||||
*/
|
||||
@Requires({"state != null", "by >= 0"})
|
||||
@Ensures("getTotalTime() == old(getTotalTime()) + by")
|
||||
private synchronized void incTimes(final State state, final long by) {
|
||||
times.put(state, times.get(state) + by);
|
||||
}
|
||||
|
||||
public enum State {
|
||||
BLOCKING("blocking on synchronized data structures"),
|
||||
WAITING("waiting on some other thread"),
|
||||
USER_CPU("doing productive CPU work"),
|
||||
WAITING_FOR_IO("waiting for I/O");
|
||||
|
||||
private final String userFriendlyName;
|
||||
|
||||
private State(String userFriendlyName) {
|
||||
this.userFriendlyName = userFriendlyName;
|
||||
}
|
||||
|
||||
public String getUserFriendlyName() {
|
||||
return userFriendlyName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -105,7 +105,7 @@ public class Allele implements Comparable<Allele> {
|
|||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
|
||||
}
|
||||
else {
|
||||
bases = BaseUtils.convertToUpperCase(bases);
|
||||
BaseUtils.convertToUpperCase(bases);
|
||||
}
|
||||
|
||||
this.isRef = isRef;
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ public class VariantContextBuilder {
|
|||
this.start = start;
|
||||
this.stop = stop;
|
||||
this.alleles = alleles;
|
||||
this.attributes = Collections.emptyMap(); // immutable
|
||||
toValidate.add(VariantContext.Validation.ALLELES);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -208,19 +208,18 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
/**
|
||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) do
|
||||
* not negatively influence the ordering of the pileup.
|
||||
* Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
|
||||
*/
|
||||
@Test
|
||||
public void testWholeIndelReadTest2() {
|
||||
public void testWholeIndelReadRepresentedTest() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,"read",0,secondLocus,1);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||
read.setCigarString("1I");
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
|
||||
read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||
read1.setCigarString("1I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(read);
|
||||
List<SAMRecord> reads = Arrays.asList(read1);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
|
@ -234,6 +233,26 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
Assert.assertFalse(pe.isAfterInsertion());
|
||||
Assert.assertEquals(pe.getEventBases(), "A");
|
||||
}
|
||||
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
|
||||
read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
|
||||
read2.setCigarString("10I");
|
||||
|
||||
reads = Arrays.asList(read2);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
||||
while(li.hasNext()) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
Assert.assertTrue(pe.isBeforeInsertion());
|
||||
Assert.assertFalse(pe.isAfterInsertion());
|
||||
Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
|
||||
}
|
||||
}
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("95b0627bfcac2191aed9908904e892ff"));
|
||||
Arrays.asList("4a0318d0452d2dccde48ef081c431bf8"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("0e2509349fd6c8a9e9408c918215e1de"));
|
||||
Arrays.asList("da19c8e3c58340ba8bcc88e95ece4ac1"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("32d81a7797605afb526983a2ab45efc2"));
|
||||
Arrays.asList("cdefe79f46482a3d050ca2132604663a"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("350539ccecea0d1f7fffd4ac29c015e7"));
|
||||
Arrays.asList("5ec4c07b6801fca7013e3b0beb8b5418"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("c222361819fae035a0162f876990fdee"));
|
||||
Arrays.asList("28c07151f5c5fae87c691d8f7d1a3929"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("0039fd0464c87e6ce66c4c8670fd8dfa"));
|
||||
Arrays.asList("9a7fa3e9ec8350e3e9cfdce0c00ddcc3"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("d1e68d4db6585ec00213b1d2d05e01a9"));
|
||||
Arrays.asList("78693f3bf5d588e250507a596aa400da"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -44,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("b53860d209f8440f12b78d01606553e1"));
|
||||
Arrays.asList("babf24ec8e5b5708d4a049629f7ea073"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("61007c22c00a2871237280914a8f88f0"));
|
||||
Arrays.asList("754187e70c1d117087e2270950a1c230"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("feda4a38bba096f7b740a146055509c2"));
|
||||
Arrays.asList("f9a2f882d050a90e6d8e6a1fba00f858"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("0ff525e65c5836289c454c76ead5d80e"));
|
||||
Arrays.asList("8a4ad38ec8015eea3461295148143428"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "e1a17f8f852c3d639f26e659d37bc1e5";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "ebb42960e115fb8dacd3edff5541b4da";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -139,7 +139,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("b0b92abbaaa4c787dce6f1b302f983ee"));
|
||||
Arrays.asList("91f7e112200ed2c3b0a5d0d9e16e9369"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -147,7 +147,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("186d33429756c89aad6cd89424d6dc94"));
|
||||
Arrays.asList("b86e52b18496ab43a6b9a1bda632b5e6"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -155,7 +155,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("11b87f68b8530da168c1418513115f30"));
|
||||
Arrays.asList("79b3e4f8b4476ce3c3acbc271d6ddcdc"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -163,23 +163,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("d2be4b1af1f29579c4f96c08e1ddd871"));
|
||||
Arrays.asList("bf7f21a600956eda0a357b97b21e3069"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "0055bd060e6ef53a6b836903d68953c9");
|
||||
testOutputParameters("-sites_only", "976109543d8d97d94e0fe0521ff326e8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "235bec0a7b2d901442261104db18f5eb");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "bec7bcc50b42782e20a970db11201399");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "7c57ede7019063c19aa9d2136045d84f");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "09494afd12cef97293ed35d1a972f623");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -193,7 +193,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("3f8d724a5158adac4df38c4e2ed04167"));
|
||||
Arrays.asList("e94be02fc5484c20b512840884e3d463"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -201,7 +201,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("3f8d724a5158adac4df38c4e2ed04167"));
|
||||
Arrays.asList("e94be02fc5484c20b512840884e3d463"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -212,12 +212,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "7e7384a3a52e19f76f368c2f4561d510" );
|
||||
testHeterozosity( 0.01, "0dca2699f709793026b853c6f339bf08" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "3d16366d870c086e894c07c9da411795" );
|
||||
testHeterozosity( 1.0 / 1850, "35f14e436927e64712a8e28080e90c91" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -241,7 +241,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("58abc4f504d3afd42271e290ac846c4b"));
|
||||
Arrays.asList("0360b79163aa28ae66d0dde4c26b3d76"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -260,7 +260,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("e247f579f01eb698cfa1ae1e8a3995a8"));
|
||||
Arrays.asList("59892388916bdfa544750ab76e43eabb"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -279,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("cc2167dce156f70f5a31ac3dce499266"));
|
||||
Arrays.asList("6aa034f669ec09ac4f5a28624cbe1830"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("1268bde77842e6bb6a4f337c1d589f4d"));
|
||||
Arrays.asList("ba7a011d0c665acc4455d58a6ab28716"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -307,7 +307,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("10c86ff98ad5ab800d208b435bcfbd7d"));
|
||||
Arrays.asList("4f7d80f4f53ef0f0959414cb30097482"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("c0c4dbb050296633a3150b104b77e05a"));
|
||||
Arrays.asList("95986d0c92436d3b9c1f1be9c768a368"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("2472722f87f8718861698f60bbba2462"));
|
||||
Arrays.asList("cecd3e35a817e299e97e8f7bbf083d2c"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -335,13 +335,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("eeb64b261f0a44aa478d753dbbf9378e"));
|
||||
Arrays.asList("c3f786a5228346b43a80aa80d22b1490"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("d0a66c234056bb83dd84113bc2421f1e"));
|
||||
Arrays.asList("1a4d856bfe53d9acee0ea303c4b83bb1"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -351,7 +351,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("db0f91abb901e097714d8755058e1319"));
|
||||
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -363,7 +363,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 20:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("b3c923ed9efa04b85fc18a9b45c8d2a6"));
|
||||
Arrays.asList("59ff26d7e5ca2503ebe9f74902251551"));
|
||||
|
||||
executeTest(String.format("test UG with base indel quality scores"), spec);
|
||||
}
|
||||
|
|
@ -397,7 +397,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("160600dfa8e46f91dbb5d574517aac74"));
|
||||
Arrays.asList("f99f9a917529bfef717fad97f725d5df"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -405,7 +405,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("aa58dc9f77132c30363562bcdc321f6e"));
|
||||
Arrays.asList("eac2cd649bd5836068350eb4260aaea7"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,32 +39,32 @@ import java.util.concurrent.*;
|
|||
/**
|
||||
* Tests for the state monitoring thread factory.
|
||||
*/
|
||||
public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
||||
public class EfficiencyMonitoringThreadFactoryUnitTest extends BaseTest {
|
||||
// the duration of the tests -- 100 ms is tolerable given the number of tests we are doing
|
||||
private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100;
|
||||
private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 1000;
|
||||
final static Object GLOBAL_LOCK = new Object();
|
||||
|
||||
private class StateTest extends TestDataProvider {
|
||||
private final double TOLERANCE = 0.1; // willing to tolerate a 10% error
|
||||
|
||||
final List<Thread.State> statesForThreads;
|
||||
final List<EfficiencyMonitoringThreadFactory.State> statesForThreads;
|
||||
|
||||
public StateTest(final List<Thread.State> statesForThreads) {
|
||||
public StateTest(final List<EfficiencyMonitoringThreadFactory.State> statesForThreads) {
|
||||
super(StateTest.class);
|
||||
this.statesForThreads = statesForThreads;
|
||||
setName("StateTest " + Utils.join(",", statesForThreads));
|
||||
}
|
||||
|
||||
public List<Thread.State> getStatesForThreads() {
|
||||
public List<EfficiencyMonitoringThreadFactory.State> getStatesForThreads() {
|
||||
return statesForThreads;
|
||||
}
|
||||
|
||||
public int getNStates() { return statesForThreads.size(); }
|
||||
|
||||
public double maxStateFraction(final Thread.State state) { return fraction(state) + TOLERANCE; }
|
||||
public double minStateFraction(final Thread.State state) { return fraction(state) - TOLERANCE; }
|
||||
public double maxStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) + TOLERANCE); }
|
||||
public double minStatePercent(final EfficiencyMonitoringThreadFactory.State state) { return 100*(fraction(state) - TOLERANCE); }
|
||||
|
||||
private double fraction(final Thread.State state) {
|
||||
private double fraction(final EfficiencyMonitoringThreadFactory.State state) {
|
||||
return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size());
|
||||
}
|
||||
}
|
||||
|
|
@ -74,18 +74,16 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
|||
* requested for input argument
|
||||
*/
|
||||
private static class StateTestThread implements Callable<Double> {
|
||||
private final Thread.State stateToImplement;
|
||||
private final EfficiencyMonitoringThreadFactory.State stateToImplement;
|
||||
|
||||
private StateTestThread(final Thread.State stateToImplement) {
|
||||
if ( ! StateMonitoringThreadFactory.TRACKED_STATES.contains(stateToImplement) )
|
||||
throw new IllegalArgumentException("Unexpected state " + stateToImplement);
|
||||
private StateTestThread(final EfficiencyMonitoringThreadFactory.State stateToImplement) {
|
||||
this.stateToImplement = stateToImplement;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double call() throws Exception {
|
||||
switch ( stateToImplement ) {
|
||||
case RUNNABLE:
|
||||
case USER_CPU:
|
||||
// do some work until we get to THREAD_TARGET_DURATION_IN_MILLISECOND
|
||||
double sum = 0.0;
|
||||
final long startTime = System.currentTimeMillis();
|
||||
|
|
@ -96,13 +94,17 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
|||
case WAITING:
|
||||
Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
|
||||
return 0.0;
|
||||
case BLOCKED:
|
||||
if ( StateMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
|
||||
case BLOCKING:
|
||||
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
|
||||
synchronized (GLOBAL_LOCK) {
|
||||
// the GLOBAL_LOCK must be held by the unit test itself for this to properly block
|
||||
if ( StateMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking");
|
||||
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking");
|
||||
}
|
||||
return 0.0;
|
||||
case WAITING_FOR_IO:
|
||||
// TODO -- implement me
|
||||
// shouldn't ever get here, throw an exception
|
||||
throw new ReviewedStingException("WAITING_FOR_IO testing currently not implemented, until we figure out how to force a system call block");
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected thread test state " + stateToImplement);
|
||||
}
|
||||
|
|
@ -111,8 +113,11 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "StateTest")
|
||||
public Object[][] createStateTest() {
|
||||
for ( final int nThreads : Arrays.asList(1, 2, 3, 4) ) {
|
||||
for (final List<Thread.State> states : Utils.makePermutations(StateMonitoringThreadFactory.TRACKED_STATES, nThreads, true) ) {
|
||||
for ( final int nThreads : Arrays.asList(3) ) {
|
||||
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.WAITING_FOR_IO);
|
||||
final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.USER_CPU, EfficiencyMonitoringThreadFactory.State.WAITING, EfficiencyMonitoringThreadFactory.State.BLOCKING);
|
||||
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.values());
|
||||
for (final List<EfficiencyMonitoringThreadFactory.State> states : Utils.makePermutations(allStates, nThreads, true) ) {
|
||||
//if ( Collections.frequency(states, Thread.State.BLOCKED) > 0)
|
||||
new StateTest(states);
|
||||
}
|
||||
|
|
@ -121,16 +126,16 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
|||
return StateTest.getTests(StateTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = false, dataProvider = "StateTest")
|
||||
@Test(enabled = true, dataProvider = "StateTest")
|
||||
public void testStateTest(final StateTest test) throws InterruptedException {
|
||||
// allows us to test blocking
|
||||
final StateMonitoringThreadFactory factory = new StateMonitoringThreadFactory(test.getNStates());
|
||||
final EfficiencyMonitoringThreadFactory factory = new EfficiencyMonitoringThreadFactory(test.getNStates());
|
||||
final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory);
|
||||
|
||||
logger.warn("Running " + test);
|
||||
synchronized (GLOBAL_LOCK) {
|
||||
//logger.warn(" Have lock");
|
||||
for ( final Thread.State threadToRunState : test.getStatesForThreads() )
|
||||
for ( final EfficiencyMonitoringThreadFactory.State threadToRunState : test.getStatesForThreads() )
|
||||
threadPool.submit(new StateTestThread(threadToRunState));
|
||||
|
||||
// lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads
|
||||
|
|
@ -153,10 +158,10 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
|||
Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime);
|
||||
Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime);
|
||||
|
||||
for (final Thread.State state : StateMonitoringThreadFactory.TRACKED_STATES ) {
|
||||
final double min = test.minStateFraction(state);
|
||||
final double max = test.maxStateFraction(state);
|
||||
final double obs = factory.getStateFraction(state);
|
||||
for (final EfficiencyMonitoringThreadFactory.State state : EfficiencyMonitoringThreadFactory.State.values() ) {
|
||||
final double min = test.minStatePercent(state);
|
||||
final double max = test.maxStatePercent(state);
|
||||
final double obs = factory.getStatePercent(state);
|
||||
// logger.warn(" Checking " + state
|
||||
// + " min " + String.format("%.2f", min)
|
||||
// + " max " + String.format("%.2f", max)
|
||||
|
|
@ -170,6 +175,6 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
|
|||
Assert.assertEquals(factory.getNThreadsCreated(), test.getNStates());
|
||||
|
||||
// should be called to ensure we don't format / NPE on output
|
||||
factory.printUsageInformation(logger, Priority.INFO);
|
||||
factory.printUsageInformation(logger, Priority.WARN);
|
||||
}
|
||||
}
|
||||
|
|
@ -750,6 +750,10 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
modified = new VariantContextBuilder(modified).attributes(null).attribute("AC", 1).make();
|
||||
Assert.assertEquals(modified.getAttribute("AC"), 1);
|
||||
|
||||
// test the behavior when the builder's attribute object is not initialized
|
||||
modified = new VariantContextBuilder(modified.getSource(), modified.getChr(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make();
|
||||
|
||||
// test normal attribute modification
|
||||
modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make();
|
||||
Assert.assertEquals(modified.getAttribute("AC"), 1);
|
||||
modified = new VariantContextBuilder(modified).attribute("AC", 2).make();
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ my %ref_order;
|
|||
my $n = 0;
|
||||
while ( <DICT> ) {
|
||||
chomp;
|
||||
my ($contig, $rest) = split "\t";
|
||||
my ($contig, $rest) = split '\s';
|
||||
die("Dictionary file is probably corrupt: multiple instances of contig $contig") if ( defined $ref_order{$contig} );
|
||||
|
||||
$ref_order{$contig} = $n;
|
||||
|
|
|
|||
Loading…
Reference in New Issue