Merge remote-tracking branch 'unstable/master' into develop

This commit is contained in:
Mauricio Carneiro 2012-10-27 13:50:03 -04:00
commit 468640476f
88 changed files with 1468 additions and 513 deletions

View File

@ -0,0 +1,197 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.PrintStream;
import java.util.*;
public class AlleleBiasedDownsamplingUtils {
/**
* Computes an allele biased version of the given pileup
*
* @param pileup the original pileup
* @param downsamplingFraction the fraction of total reads to remove per allele
* @param log logging output
* @return allele biased pileup
*/
public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
// special case removal of all or no reads
if ( downsamplingFraction <= 0.0 )
return pileup;
if ( downsamplingFraction >= 1.0 )
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
for ( int i = 0; i < 4; i++ )
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
// start by stratifying the reads by the alleles they represent at this position
for( final PileupElement pe : pileup ) {
// abort if we have a reduced read - we do not want to remove it!
if ( pe.getRead().isReducedRead() )
return pileup;
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
alleleStratifiedElements[baseIndex].add(pe);
}
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor
final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
@Override
public int compare(PileupElement element1, PileupElement element2) {
final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
}
});
for ( int i = 0; i < 4; i++ ) {
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
if ( alleleList.size() <= numReadsToRemove )
logAllElements(alleleList, log);
else
elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove, log));
}
// clean up pointers so memory can be garbage collected if needed
for ( int i = 0; i < 4; i++ )
alleleStratifiedElements[i].clear();
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
}
/**
* Performs allele biased down-sampling on a pileup and computes the list of elements to keep
*
* @param elements original list of records
* @param numElementsToRemove the number of records to remove
* @param log logging output
* @return the list of pileup elements TO KEEP
*/
private static List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove, final PrintStream log) {
final int pileupSize = elements.size();
final BitSet itemsToRemove = new BitSet(pileupSize);
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
itemsToRemove.set(selectedIndex);
}
ArrayList<PileupElement> elementsToKeep = new ArrayList<PileupElement>(pileupSize - numElementsToRemove);
for ( int i = 0; i < pileupSize; i++ ) {
if ( itemsToRemove.get(i) )
logRead(elements.get(i).getRead(), log);
else
elementsToKeep.add(elements.get(i));
}
return elementsToKeep;
}
/**
* Computes reads to remove based on an allele biased down-sampling
*
* @param alleleReadMap original list of records per allele
* @param downsamplingFraction the fraction of total reads to remove per allele
* @param log logging output
* @return list of reads TO REMOVE from allele biased down-sampling
*/
public static List<GATKSAMRecord> selectAlleleBiasedReads(final Map<Allele, List<GATKSAMRecord>> alleleReadMap, final double downsamplingFraction, final PrintStream log) {
int totalReads = 0;
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() )
totalReads += reads.size();
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
int numReadsToRemove = (int)(totalReads * downsamplingFraction);
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove * alleleReadMap.size());
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() ) {
if ( reads.size() <= numReadsToRemove ) {
readsToRemove.addAll(reads);
logAllReads(reads, log);
} else {
readsToRemove.addAll(downsampleReads(reads, numReadsToRemove, log));
}
}
return readsToRemove;
}
/**
* Performs allele biased down-sampling on a pileup and computes the list of elements to remove
*
* @param reads original list of records
* @param numElementsToRemove the number of records to remove
* @param log logging output
* @return the list of pileup elements TO REMOVE
*/
private static List<GATKSAMRecord> downsampleReads(final List<GATKSAMRecord> reads, final int numElementsToRemove, final PrintStream log) {
final int pileupSize = reads.size();
final BitSet itemsToRemove = new BitSet(pileupSize);
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
itemsToRemove.set(selectedIndex);
}
ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(pileupSize - numElementsToRemove);
for ( int i = 0; i < pileupSize; i++ ) {
if ( itemsToRemove.get(i) ) {
final GATKSAMRecord read = reads.get(i);
readsToRemove.add(read);
logRead(read, log);
}
}
return readsToRemove;
}
private static void logAllElements(final List<PileupElement> elements, final PrintStream log) {
if ( log != null ) {
for ( final PileupElement p : elements )
logRead(p.getRead(), log);
}
}
private static void logAllReads(final List<GATKSAMRecord> reads, final PrintStream log) {
if ( log != null ) {
for ( final GATKSAMRecord read : reads )
logRead(read, log);
}
}
private static void logRead(final SAMRecord read, final PrintStream log) {
if ( log != null ) {
final SAMReadGroupRecord readGroup = read.getReadGroup();
log.println(String.format("%s\t%s\t%s\t%s", read.getReadName(), readGroup.getSample(), readGroup.getLibrary(), readGroup.getPlatformUnit()));
}
}
}

View File

@ -28,26 +28,22 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.threading.ThreadLocalArray;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
// optimizations: don't reallocate an array each time
private byte[] tempQualArray;
private boolean[] tempErrorArray;
private double[] tempFractionalErrorArray;
// optimization: only allocate temp arrays once per thread
private final ThreadLocal<byte[]> threadLocalTempQualArray = new ThreadLocalArray<byte[]>(EventType.values().length, byte.class);
private final ThreadLocal<boolean[]> threadLocalTempErrorArray = new ThreadLocalArray<boolean[]>(EventType.values().length, boolean.class);
private final ThreadLocal<double[]> threadLocalTempFractionalErrorArray = new ThreadLocalArray<double[]>(EventType.values().length, double.class);
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
super.initialize(covariates, recalibrationTables);
tempQualArray = new byte[EventType.values().length];
tempErrorArray = new boolean[EventType.values().length];
tempFractionalErrorArray = new double[EventType.values().length];
}
/**
@ -60,10 +56,13 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
* @param refBase The reference base at this locus
*/
@Override
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
byte[] tempQualArray = threadLocalTempQualArray.get();
boolean[] tempErrorArray = threadLocalTempErrorArray.get();
tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual();
tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual();
@ -77,40 +76,29 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
final byte qual = tempQualArray[eventIndex];
final boolean isError = tempErrorArray[eventIndex];
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
// TODO: should this really be combine rather than increment?
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(isError);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(isError);
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
@Override
public synchronized void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( !skip[offset] ) {
final ReadCovariates readCovariates = covariateKeySetFrom(read);
byte[] tempQualArray = threadLocalTempQualArray.get();
double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get();
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
@ -124,30 +112,15 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
final byte qual = tempQualArray[eventIndex];
final double isError = tempFractionalErrorArray[eventIndex];
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(1.0, isError);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(1.0, isError);
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -60,7 +60,7 @@ public class ErrorModel {
boolean hasCalledAlleles = false;
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
if (refSampleVC != null) {
for (Allele allele : refSampleVC.getAlleles()) {

View File

@ -195,7 +195,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
final List<Allele> allAllelesToUse,
final boolean useBAQedPileup,
final GenomeLocParser locParser,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
HashMap<String, ErrorModel> perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts);
if (perLaneErrorModels == null && UAC.referenceSampleName != null)
@ -231,7 +231,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
// no likelihoods have been computed for this sample at this site
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
perReadAlleleLikelihoodMap.put(sample.getKey(), org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap());
}
// create the GenotypeLikelihoods object
@ -333,7 +333,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation,
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap);
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap);
protected abstract List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
final ReferenceContext ref,

View File

@ -27,7 +27,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
double[][] readHaplotypeLikelihoods;
final byte refBase;
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
@ -37,7 +37,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext,
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.pairModel = pairModel;
this.haplotypeMap = haplotypeMap;

View File

@ -74,7 +74,7 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation,
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref, perReadAlleleLikelihoodMap);
}

View File

@ -50,7 +50,7 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation,
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
}

View File

@ -217,7 +217,7 @@ public class GenotypingEngine {
}
cleanUpSymbolicUnassembledEvents( haplotypes );
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
}
if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!

View File

@ -51,6 +51,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -424,7 +425,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.*;
public class LikelihoodCalculationEngine {
@ -139,7 +140,7 @@ public class LikelihoodCalculationEngine {
}
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ) {
if( b1[iii] != b2[iii] ) {
return iii;
}
@ -346,11 +347,13 @@ public class LikelihoodCalculationEngine {
public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call,
final double downsamplingFraction,
final PrintStream downsamplingLog ) {
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
final PerReadAlleleLikelihoodMap likelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
@ -370,6 +373,9 @@ public class LikelihoodCalculationEngine {
}
}
// down-sample before adding filtered reads
likelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog);
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotyper;
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.PrintStream;
import java.util.*;
public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLikelihoodMap implements ProtectedPackageSource {
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
return AlleleBiasedDownsamplingUtils.createAlleleBiasedBasePileup(pileup, downsamplingFraction, log);
}
public void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log) {
// special case removal of all or no reads
if ( downsamplingFraction <= 0.0 )
return;
if ( downsamplingFraction >= 1.0 ) {
likelihoodReadMap.clear();
return;
}
// start by stratifying the reads by the alleles they represent at this position
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>(alleles.size());
for ( Allele allele : alleles )
alleleReadMap.put(allele, new ArrayList<GATKSAMRecord>());
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
// do not remove reduced reads!
if ( !entry.getKey().isReducedRead() ) {
final Allele bestAllele = getMostLikelyAllele(entry.getValue());
if ( bestAllele != Allele.NO_CALL )
alleleReadMap.get(bestAllele).add(entry.getKey());
}
}
// compute the reads to remove and actually remove them
final List<GATKSAMRecord> readsToRemove = AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(alleleReadMap, downsamplingFraction, log);
for ( final GATKSAMRecord read : readsToRemove )
likelihoodReadMap.remove(read);
}
}

View File

@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* @author ebanks
@ -73,11 +75,6 @@ public class BQSRIntegrationTest extends WalkerTest {
params.getCommandLine(),
Arrays.asList(params.md5));
executeTest("testBQSR-"+params.args, spec).getFirst();
WalkerTestSpec specNT2 = new WalkerTestSpec(
params.getCommandLine() + " -nt 2",
Arrays.asList(params.md5));
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
}
@Test
@ -123,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest {
@DataProvider(name = "PRTest")
public Object[][] createPRTestData() {
return new Object[][]{
{new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")},
{new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")},
{new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")},
{new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}
};
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{1, new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")});
tests.add(new Object[]{1, new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")});
tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")});
for ( final int nct : Arrays.asList(1, 2, 4) ) {
tests.add(new Object[]{nct, new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")});
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "PRTest")
public void testPR(PRTest params) {
public void testPR(final int nct, PRTest params) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T PrintReads" +
" -R " + hg18Reference +
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
" -nct " + nct +
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
params.args +
" -o %s",

View File

@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSNP_ACS_Pools() {
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8");
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
}
}

View File

@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "ee866a8694a6f6c77242041275350ab9");
HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3");
HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60");
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "30598abeeb0b0ae5816ffdbf0c4044fd");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -91,9 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f"));
Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}

View File

@ -406,7 +406,7 @@ public abstract class ArgumentTypeDescriptor {
else
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s. Message: %s",
value.asString(), fieldName, e.getMessage()));
value, fieldName, e.getMessage()));
}
}
}

View File

@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
import java.io.File;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* A GenomeAnalysisEngine that runs a specified walker.
@ -73,6 +74,7 @@ public class GenomeAnalysisEngine {
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class);
public static final long NO_RUNTIME_LIMIT = -1;
/**
* The GATK command-line argument parsing code.
@ -1090,6 +1092,33 @@ public class GenomeAnalysisEngine {
public String createApproximateCommandLineArgumentString(Object... argumentProviders) {
return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders);
}
/**
* Does the current runtime in unit exceed the runtime limit, if one has been provided?
*
* @param runtime the runtime of this GATK instance in minutes
* @param unit the time unit of runtime
* @return false if not limit was requested or if runtime <= the limit, true otherwise
*/
public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) {
if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime);
if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT )
return false;
else {
final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit);
final long maxRuntimeNano = getRuntimeLimitInNanoseconds();
return actualRuntimeNano > maxRuntimeNano;
}
}
/**
* @return the runtime limit in nanoseconds, or -1 if no limit was specified
*/
public long getRuntimeLimitInNanoseconds() {
if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT )
return -1;
else
return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits);
}
}

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
@ -44,6 +45,7 @@ import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* @author aaron
@ -91,7 +93,7 @@ public class GATKArgumentCollection {
// --------------------------------------------------------------------------------------------------------------
//
// XXX
// General features
//
// --------------------------------------------------------------------------------------------------------------
@ -143,6 +145,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.")
public boolean disableRandomization = false;
@Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false)
public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT;
@Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc="The TimeUnit for maxRuntime", required = false)
public TimeUnit maxRuntimeUnits = TimeUnit.MINUTES;
// --------------------------------------------------------------------------------------------------------------
//
// Downsampling Arguments

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.PrintStream;
/**
* Created with IntelliJ IDEA.
@ -63,26 +64,24 @@ public class StandardCallerArgumentCollection {
public int MAX_ALTERNATE_ALLELES = 6;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*
* This argument has been retired in GATK 2.2. Please specify just maxAltAlleles from now on
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/
@Deprecated
@Hidden
@Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "This argument has been retired in GATK 2.2. Please specify just maxAltAlleles from now on, which will apply to any variant, regardless of type", required = false)
public int MAX_ALTERNATE_ALLELES_FOR_INDELS = -1;
@Advanced
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
/**
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
* will try to remove (N * contamination fraction) bases for each alternate allele.
*/
@Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05;
@Hidden
@Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
public double CONTAMINATION_PERCENTAGE = 0.0;
@Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false)
public PrintStream contaminationLog = null;
@Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)
@ -96,19 +95,12 @@ public class StandardCallerArgumentCollection {
this.GenotypingMode = SCAC.GenotypingMode;
this.heterozygosity = SCAC.heterozygosity;
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS;
this.OutputMode = SCAC.OutputMode;
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
this.CONTAMINATION_PERCENTAGE = SCAC.CONTAMINATION_PERCENTAGE;
this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION;
this.contaminationLog = SCAC.contaminationLog;
this.exactCallsLog = SCAC.exactCallsLog;
this.AFmodel = SCAC.AFmodel;
}
/**
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/
@Advanced
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
}

View File

@ -123,7 +123,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
final ReduceTree reduceTree = new ReduceTree(this);
initializeWalker(walker);
while (isShardTraversePending() || isTreeReducePending()) {
while (! abortExecution() && (isShardTraversePending() || isTreeReducePending())) {
// Check for errors during execution.
errorTracker.throwErrorIfPending();

View File

@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
final TraversalEngine traversalEngine = borrowTraversalEngine(this);
for (Shard shard : shardStrategy ) {
if ( done || shard == null ) // we ran out of shards that aren't owned
if ( abortExecution() || done || shard == null ) // we ran out of shards that aren't owned
break;
if(shard.getShardType() == Shard.ShardType.LOCUS) {

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,6 +53,7 @@ import javax.management.ObjectName;
import java.io.File;
import java.lang.management.ManagementFactory;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
@ -269,6 +271,26 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
this.threadEfficiencyMonitor = threadEfficiencyMonitor;
}
/**
* Should we stop all execution work and exit gracefully?
*
* Returns true in the case where some external signal or time limit has been received, indicating
* that this GATK shouldn't continue executing. This isn't a kill signal, it is really a "shutdown
* gracefully at the next opportunity" signal. Concrete implementations of the MicroScheduler
* examine this value as often as reasonable and, if it returns true, stop what they are doing
* at the next available opportunity, shutdown their resources, call notify done, and return.
*
* @return true if we should abort execution, or false otherwise
*/
protected boolean abortExecution() {
final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS);
if ( abort ) {
final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4);
logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft);
}
return abort;
}
/**
* Walks a walker over the given list of intervals.
*

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -1,14 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.*;

View File

@ -34,13 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -32,8 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -8,12 +8,10 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.IndelUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -1,14 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;

View File

@ -7,14 +7,13 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -7,11 +7,9 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;

View File

@ -7,16 +7,14 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;

View File

@ -7,15 +7,13 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;

View File

@ -5,7 +5,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;

View File

@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;

View File

@ -1,9 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;

View File

@ -109,7 +109,7 @@ import java.util.ArrayList;
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
public class BaseRecalibrator extends LocusWalker<Long, Long> {
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
@ -277,11 +277,6 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
return sum;
}
public Long treeReduce(Long sum1, Long sum2) {
sum1 += sum2;
return sum1;
}
@Override
public void onTraversalDone(Long result) {
logger.info("Calculating quantized quality scores...");

View File

@ -55,7 +55,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
* @param refBase The reference base at this locus
*/
@Override
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
final int offset = pileupElement.getOffset();
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
@ -65,35 +65,21 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
// TODO: should this really be combine rather than increment?
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(isError);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(isError);
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
@Override
public synchronized void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version");
}
@ -121,4 +107,133 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE);
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error status for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
// There are three cases here:
//
// 1. The original get succeeded (existingDatum != null) because there already was an item in this position.
// In this case we can just increment the existing item.
//
// 2. The original get failed (existingDatum == null), and we successfully put a new item in this position
// and are done.
//
// 3. The original get failed (existingDatum == null), AND the put fails because another thread comes along
// in the interim and puts an item in this position. In this case we need to do another get after the
// failed put to get the item the other thread put here and increment it.
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(isError);
}
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final double isError, final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1.0, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1.0, isError);
}
}
/**
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
* new item there if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error status for this event
* @param keys location in table of our item
*/
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
final RecalDatum newDatum = createDatumObject(qual, isError);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(newDatum, keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
table.get(keys).combine(newDatum);
}
}
else {
// Easy case: already an item here, so combine it with our item
existingDatum.combine(newDatum);
}
}
/**
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
* new item there if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final double isError, final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
final RecalDatum newDatum = createDatumObject(qual, isError);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(newDatum, keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
table.get(keys).combine(newDatum);
}
}
else {
// Easy case: already an item here, so combine it with our item
existingDatum.combine(newDatum);
}
}
}

View File

@ -104,7 +104,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
final List<Allele> allAllelesToUse,
final boolean useBAQedPileup,
final GenomeLocParser locParser,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap);
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap);
protected int getFilteredDepth(ReadBackedPileup pileup) {

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -81,7 +82,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final List<Allele> allAllelesToUse,
final boolean useBAQedPileup,
final GenomeLocParser locParser,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
GenomeLoc loc = ref.getLocus();
// if (!ref.getLocus().equals(lastSiteVisited)) {
@ -118,12 +119,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
// no likelihoods have been computed for this sample at this site
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
perReadAlleleLikelihoodMap.put(sample.getKey(), PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap());
}
final ReadBackedPileup pileup = context.getBasePileup();
if (pileup != null) {
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()));
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.CONTAMINATION_FRACTION, UAC.contaminationLog);
b.PL(genotypeLikelihoods);
b.DP(getFilteredDepth(pileup));
genotypes.add(b.make());

View File

@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -48,13 +49,13 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
private final boolean useAlleleFromVCF;
private final double[] likelihoodSums = new double[4];
private final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
private final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
for ( int i = 0; i < 4; i++ )
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
}
public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
@ -64,9 +65,9 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final List<Allele> allAllelesToUse,
final boolean useBAQedPileup,
final GenomeLocParser locParser,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, PerReadAlleleLikelihoodMap> sampleLikelihoodMap) {
perReadAlleleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data
sampleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data
final byte refBase = ref.getBase();
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
@ -79,8 +80,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
ArrayList<SampleGenotypeData> GLs = new ArrayList<SampleGenotypeData>(contexts.size());
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
if ( UAC.CONTAMINATION_PERCENTAGE > 0.0 )
pileup = createDecontaminatedPileup(pileup, UAC.CONTAMINATION_PERCENTAGE);
if ( UAC.CONTAMINATION_FRACTION > 0.0 )
pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup, UAC.CONTAMINATION_FRACTION, UAC.contaminationLog);
if ( useBAQedPileup )
pileup = createBAQedPileup(pileup);
@ -203,42 +204,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
return allelesToUse;
}
public ReadBackedPileup createDecontaminatedPileup(final ReadBackedPileup pileup, final double contaminationPercentage) {
// special case removal of all reads
if ( contaminationPercentage >= 1.0 )
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
// start by stratifying the reads by the alleles they represent at this position
for( final PileupElement pe : pileup ) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
alleleStratifiedElements[baseIndex].add(pe);
}
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
int numReadsToRemove = (int)Math.ceil((double)pileup.getNumberOfElements() * contaminationPercentage);
final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
@Override
public int compare(PileupElement element1, PileupElement element2) {
final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
}
});
for ( int i = 0; i < 4; i++ ) {
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
if ( alleleList.size() > numReadsToRemove )
elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove));
}
// clean up pointers so memory can be garbage collected if needed
for ( int i = 0; i < 4; i++ )
alleleStratifiedElements[i].clear();
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
}
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
for( final PileupElement PE : pileup ) {
@ -257,22 +222,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
}
private List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove) {
final int pileupSize = elements.size();
final BitSet itemsToRemove = new BitSet(pileupSize);
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
itemsToRemove.set(selectedIndex);
}
ArrayList<PileupElement> elementsToKeep = new ArrayList<PileupElement>(pileupSize - numElementsToRemove);
for ( int i = 0; i < pileupSize; i++ ) {
if ( !itemsToRemove.get(i) )
elementsToKeep.add(elements.get(i));
}
return elementsToKeep;
}
private static class SampleGenotypeData {
public final String name;

View File

@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
/**
* Note that calculating the SLOD increases the runtime by an appreciable amount.
*/
@Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
@Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false)
public boolean COMPUTE_SLOD = false;
/**
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
this.GLmodel = uac.GLmodel;
this.AFmodel = uac.AFmodel;
this.PCR_error = uac.PCR_error;
this.NO_SLOD = uac.NO_SLOD;
this.COMPUTE_SLOD = uac.COMPUTE_SLOD;
this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE;
this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
@ -234,36 +235,26 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ||
UAC.referenceSampleName != null ||
UAC.referenceSampleRod.isBound()) {
throw new UserException.NotSupportedInGATKLite("Usage of ploidy values different than 2 not supported in this GATK version");
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of ploidy values other than 2");
}
if ( UAC.CONTAMINATION_FRACTION > 0.0 ) {
if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) {
UAC.CONTAMINATION_FRACTION = 0.0;
logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite");
} else {
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling");
}
}
}
if ( UAC.TREAT_ALL_READS_AS_SINGLE_POOL ) {
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
} else {
// get all of the unique sample names
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
} else {
// in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2");
}
if (UAC.AFmodel != AFCalc.Model.POOL)
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
}
*/
// get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear();
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
} else {
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
if (UAC.referenceSampleName != null )
samples.remove(UAC.referenceSampleName);
}
if ( UAC.referenceSampleName != null )
samples.remove(UAC.referenceSampleName);
}
// check for a bad max alleles value
@ -305,7 +296,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
if ( !UAC.NO_SLOD )
if ( UAC.COMPUTE_SLOD )
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY);
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )

View File

@ -153,19 +153,19 @@ public class UnifiedGenotyperEngine {
}
/**
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
*
* If allSamples != null, then the output variantCallContext is guarenteed to contain a genotype
* for every sample in allSamples. If it's null there's no such guarentee. Providing this
* argument is critical when the resulting calls will be written to a VCF file.
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @param allSamples set of all sample names that we might call (i.e., those in the VCF header)
* @return the VariantCallContext object
*/
/**
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
*
* If allSamples != null, then the output variantCallContext is guarenteed to contain a genotype
* for every sample in allSamples. If it's null there's no such guarentee. Providing this
* argument is critical when the resulting calls will be written to a VCF file.
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @param allSamples set of all sample names that we might call (i.e., those in the VCF header)
* @return the VariantCallContext object
*/
public List<VariantCallContext> calculateLikelihoodsAndGenotypes(final RefMetaDataTracker tracker,
final ReferenceContext refContext,
final AlignmentContext rawContext,
@ -174,7 +174,7 @@ public class UnifiedGenotyperEngine {
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<String,PerReadAlleleLikelihoodMap>();
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>();
if ( models.isEmpty() ) {
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
@ -209,7 +209,7 @@ public class UnifiedGenotyperEngine {
public VariantContext calculateLikelihoods(final RefMetaDataTracker tracker,
final ReferenceContext refContext,
final AlignmentContext rawContext,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
if ( models.isEmpty() ) {
return null;
@ -275,7 +275,7 @@ public class UnifiedGenotyperEngine {
final List<Allele> alternateAllelesToUse,
final boolean useBAQedPileup,
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
// initialize the data for this thread if that hasn't been done yet
if ( glcm.get() == null ) {
@ -313,7 +313,7 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vc, false);
}
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
return calculateGenotypes(null, null, null, null, vc, model, perReadAlleleLikelihoodMap);
}
@ -327,7 +327,7 @@ public class UnifiedGenotyperEngine {
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false,perReadAlleleLikelihoodMap);
}
@ -346,7 +346,7 @@ public class UnifiedGenotyperEngine {
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
final boolean inheritAttributesFromInputVC,
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
@ -451,7 +451,7 @@ public class UnifiedGenotyperEngine {
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
}
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
//final boolean DEBUG_SLOD = false;
// the overall lod

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
@ -41,8 +41,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
@ -188,11 +188,14 @@ public class PairHMMIndelErrorModel {
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext ref,
final int eventLength,
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap,
final double downsamplingFraction,
final PrintStream downsamplingLog) {
final int numHaplotypes = haplotypeMap.size();
final int readCounts[] = new int[pileup.getNumberOfElements()];
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts);
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog);
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
}

View File

@ -438,8 +438,6 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0));
try {
// we already checked that bedOutput and output_file are not set simultaneously
if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput);
@ -1152,8 +1150,9 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
GenotypesContext genotypes = GenotypesContext.create();
for ( String sample : normalSamples ) {
final GenotypeBuilder gb = new GenotypeBuilder(sample);
gb.attributes(call.makeStatsAttributes(null));
GenotypeBuilder gb = new GenotypeBuilder(sample);
gb=call.addStatsAttributes(gb);
gb.alleles(! discard_event
? alleles // we made a call - put actual het genotype here:
: homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
@ -1200,8 +1199,11 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
if ( start == 0 )
return;
Map<String,Object> attrsNormal = nCall.makeStatsAttributes(null);
Map<String,Object> attrsTumor = tCall.makeStatsAttributes(null);
GenotypeBuilder nGB = new GenotypeBuilder();
GenotypeBuilder tGB = new GenotypeBuilder();
nCall.addStatsAttributes(nGB);
tCall.addStatsAttributes(tGB);
Map<String,Object> attrs = new HashMap();
@ -1242,11 +1244,11 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
GenotypesContext genotypes = GenotypesContext.create();
for ( String sample : normalSamples ) {
genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal));
genotypes.add(nGB.name(sample).alleles(homRefN ? homRefAlleles : alleles).make());
}
for ( String sample : tumorSamples ) {
genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor));
genotypes.add(tGB.name(sample).alleles(homRefT ? homRefAlleles : alleles).make());
}
Set<String> filters = null;
@ -1254,14 +1256,6 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
filters = new HashSet<String>();
filters.add("NoCall");
}
if ( nCall.getCoverage() < minNormalCoverage ) {
if ( filters == null ) filters = new HashSet<String>();
filters.add("NCov");
}
if ( tCall.getCoverage() < minCoverage ) {
if ( filters == null ) filters = new HashSet<String>();
filters.add("TCov");
}
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
.genotypes(genotypes).filters(filters).attributes(attrs).make();
@ -1844,6 +1838,38 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,attr);
return attr;
}
/**
* Adds alignment statistics directly into the genotype builder object.
*
* @param gb
* @return
*/
public GenotypeBuilder addStatsAttributes(GenotypeBuilder gb) {
if ( gb == null ) gb = new GenotypeBuilder();
gb = VCFIndelAttributes.recordDepth(getConsensusVariantCount(),getAllVariantCount(),getCoverage(),gb);
gb = VCFIndelAttributes.recordAvMM(getAvConsensusMismatches(),getAvRefMismatches(),gb);
gb = VCFIndelAttributes.recordAvMapQ(getAvConsensusMapq(),getAvRefMapq(),gb);
gb = VCFIndelAttributes.recordNQSMMRate(getNQSConsensusMMRate(),getNQSRefMMRate(),gb);
gb = VCFIndelAttributes.recordNQSAvQ(getNQSConsensusAvQual(),getNQSRefAvQual(),gb);
gb = VCFIndelAttributes.recordOffsetFromStart(from_start_median,from_start_mad,gb);
gb = VCFIndelAttributes.recordOffsetFromEnd(from_end_median,from_end_mad,gb);
PrimitivePair.Int strand_cons = getConsensusStrandCounts();
PrimitivePair.Int strand_ref = getRefStrandCounts();
gb = VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,gb);
return gb;
}
}
interface IndelListener {
@ -2170,18 +2196,18 @@ class VCFIndelAttributes {
public static Set<? extends VCFHeaderLine> getAttributeHeaderLines() {
Set<VCFHeaderLine> lines = new HashSet<VCFHeaderLine>();
lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus indel/reference at the site"));
lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus reference/indel at the site"));
lines.add(new VCFFormatHeaderLine(DEPTH_TOTAL_KEY, 1, VCFHeaderLineType.Integer, "Total coverage at the site"));
lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of consensus indel-supporting reads/reference-supporting reads"));
lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of ref-/consensus indel-supporting reads"));
lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per consensus indel-supporting read/per reference-supporting read"));
lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per ref-/consensus indel-supporting read"));
lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in consensus indel-supporting reads/in reference-supporting reads"));
lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in ref/consensus indel-supporting reads"));
lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases from consensus indel-supporting reads/from reference-supporting reads"));
lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases in ref-/consensus indel-supporting reads"));
lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned indel-supporting reads / forward-/reverse-aligned reference supporting reads"));
lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned reference and indel-supporting reads (FwdRef,RevRef,FwdIndel,RevIndel)"));
lines.add(new VCFFormatHeaderLine(RSTART_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the starts of the reads"));
lines.add(new VCFFormatHeaderLine(REND_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the ends of the reads"));
@ -2194,39 +2220,72 @@ class VCFIndelAttributes {
return attrs;
}
public static GenotypeBuilder recordStrandCounts(int cnt_cons_fwd, int cnt_cons_rev, int cnt_ref_fwd, int cnt_ref_rev,
GenotypeBuilder gb) {
return gb.attribute(STRAND_COUNT_KEY, new Integer[] {cnt_ref_fwd, cnt_ref_rev,cnt_cons_fwd, cnt_cons_rev } );
}
public static Map<String,Object> recordDepth(int cnt_cons, int cnt_indel, int cnt_total, Map<String,Object> attrs) {
attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_cons, cnt_indel} );
attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_total-cnt_indel, cnt_cons} );
attrs.put(DEPTH_TOTAL_KEY, cnt_total);
return attrs;
}
public static GenotypeBuilder recordDepth(int cnt_cons, int cnt_indel, int cnt_total, GenotypeBuilder gb) {
return gb.AD(new int[] {cnt_total-cnt_indel,cnt_cons} ).DP(cnt_total);
}
public static Map<String,Object> recordAvMapQ(double cons, double ref, Map<String,Object> attrs) {
attrs.put(MAPQ_KEY, new Float[] {(float)cons, (float)ref} );
attrs.put(MAPQ_KEY, new Float[] {(float)ref, (float)cons} );
return attrs;
}
public static GenotypeBuilder recordAvMapQ(double cons, double ref, GenotypeBuilder gb) {
return gb.attribute(MAPQ_KEY,new float[] {(float)ref, (float)cons} );
}
public static Map<String,Object> recordAvMM(double cons, double ref, Map<String,Object> attrs) {
attrs.put(MM_KEY, new Float[] {(float)cons, (float)ref} );
attrs.put(MM_KEY, new Float[] {(float)ref, (float)cons} );
return attrs;
}
public static GenotypeBuilder recordAvMM(double cons, double ref, GenotypeBuilder gb) {
return gb.attribute(MM_KEY, new float[] {(float)ref, (float)cons} );
}
public static Map<String,Object> recordNQSMMRate(double cons, double ref, Map<String,Object> attrs) {
attrs.put(NQS_MMRATE_KEY, new Float[] {(float)cons, (float)ref} );
attrs.put(NQS_MMRATE_KEY, new Float[] {(float)ref, (float)cons} );
return attrs;
}
public static GenotypeBuilder recordNQSMMRate(double cons, double ref, GenotypeBuilder gb) {
return gb.attribute(NQS_MMRATE_KEY, new float[] {(float)ref, (float)cons} );
}
public static Map<String,Object> recordNQSAvQ(double cons, double ref, Map<String,Object> attrs) {
attrs.put(NQS_AVQ_KEY, new Float[] {(float)cons, (float)ref} );
attrs.put(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} );
return attrs;
}
public static GenotypeBuilder recordNQSAvQ(double cons, double ref, GenotypeBuilder gb) {
return gb.attribute(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} );
}
public static Map<String,Object> recordOffsetFromStart(int median, int mad, Map<String,Object> attrs) {
attrs.put(RSTART_OFFSET_KEY, new Integer[] {median, mad} );
return attrs;
}
public static GenotypeBuilder recordOffsetFromStart(int median, int mad, GenotypeBuilder gb) {
return gb.attribute(RSTART_OFFSET_KEY, new int[] {median, mad} );
}
public static Map<String,Object> recordOffsetFromEnd(int median, int mad, Map<String,Object> attrs) {
attrs.put(REND_OFFSET_KEY, new Integer[] {median, mad} );
return attrs;
}
public static GenotypeBuilder recordOffsetFromEnd(int median, int mad, GenotypeBuilder gb) {
return gb.attribute(REND_OFFSET_KEY, new int[] {median, mad} );
}
}

View File

@ -0,0 +1,173 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
import java.util.List;
/**
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
* the reduced bam are above sufficient threshold)
*
* <h2>Input</h2>
* <p>
* The original and reduced BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -I:original original.bam \
* -I:reduced reduced.bam \
* -R ref.fasta \
* -T AssessReducedQuals \
* -o output.intervals
* </pre>
*
* @author ami
*/
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
private static final String reduced = "reduced";
private static final String original = "original";
private static final int originalQualsIndex = 0;
private static final int reducedQualsIndex = 1;
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
public int sufficientQualSum = 600;
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
public int qual_epsilon = 0;
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional
public int debugLevel = 0; // TODO -- best to make this an enum or boolean
@Output
protected PrintStream out;
public void initialize() {
if (debugLevel != 0)
out.println(" Debug mode" +
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
"Debug:\tqual_epsilon: "+qual_epsilon);
}
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@Override
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
boolean reportLocus;
final int[] quals = getPileupQuals(context.getBasePileup());
if (debugLevel != 0)
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
if(debugLevel != 0 && reportLocus)
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
return reportLocus ? ref.getLocus() : null;
}
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
final int[] quals = new int[2];
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
for( PileupElement p : readPileup ){
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
if ( isGoodRead(p,tags) ){
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
final int tagIndex = getTagIndex(tags);
quals[tagIndex] += tempQual;
if(debugLevel == 2)
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
}
}
if(debugLevel == 2){
out.println(printPileup[originalQualsIndex]);
out.println(printPileup[reducedQualsIndex]);
}
return quals;
}
// TODO -- arguments/variables should be final, not method declaration
private final boolean isGoodRead(PileupElement p, List<String> tags){
// TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which
// TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only
// TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
}
private final int getTagIndex(List<String> tags){
return tags.contains(reduced) ? 1 : 0;
}
@Override
public void onTraversalDone(GenomeLoc sum) {
if ( sum != null )
out.println(sum);
}
@Override
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
if ( lhs == null )
return rhs;
if ( rhs == null )
return lhs;
// if contiguous, just merge them
if ( lhs.contiguousP(rhs) )
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
// otherwise, print the lhs and start over with the rhs
out.println(lhs);
return rhs;
}
@Override
public GenomeLoc reduceInit() {
return null;
}
@Override
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
if ( value == null )
return sum;
if ( sum == null )
return value;
// if contiguous, just merge them
if ( sum.contiguousP(value) )
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
// otherwise, print the sum and start over with the value
out.println(sum);
return value;
}
}

View File

@ -245,24 +245,21 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
@Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false)
private int minDepth = -1;
/**
* If your VCF or BAM file has more than one sample and you only want to validate one, use this parameter to choose it.
*/
@Hidden
@Argument(fullName ="sample", shortName ="sn", doc="Name of the sample to validate (in case your VCF/BAM has more than one sample)", required=false)
private String sample = "";
/**
/**
* Print out discordance sites to standard out.
*/
@Hidden
@Argument(fullName ="print_interesting_sites", shortName ="print_interesting", doc="Print out interesting sites to standard out", required=false)
private boolean printInterestingSites;
private boolean printInterestingSites = false;
private UnifiedGenotyperEngine snpEngine;
private UnifiedGenotyperEngine indelEngine;
private Set<String> samples;
private enum GVstatus {
T, F, NONE
}
public static class CountedData {
private long nAltCalledAlt = 0L;
private long nAltCalledRef = 0L;
@ -336,8 +333,9 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
// Adding the INDEL calling arguments for UG
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
UnifiedArgumentCollection uac_indel = new UnifiedArgumentCollection(uac);
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac_indel);
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.STANDARD_CONFIDENCE_FOR_CALLING;
@ -368,9 +366,10 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
// Do not operate on variants that are not covered to the optional minimum depth
if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
counter.nUncovered = 1L;
if (vcComp.getAttribute("GV").equals("T"))
final GVstatus status = getGVstatus(vcComp);
if ( status == GVstatus.T )
counter.nAltNotCalled = 1L;
else if (vcComp.getAttribute("GV").equals("F"))
else if ( status == GVstatus.F )
counter.nRefNotCalled = 1L;
else
counter.nNoStatusNotCalled = 1L;
@ -427,10 +426,11 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
// if (!vcComp.hasExtendedAttribute("GV"))
// throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart());
final GVstatus status = getGVstatus(vcComp);
if (call.isCalledAlt(callConf)) {
if (vcComp.getAttribute("GV").equals("T"))
if ( status == GVstatus.T )
counter.nAltCalledAlt = 1L;
else if (vcComp.getAttribute("GV").equals("F")) {
else if ( status == GVstatus.F ) {
counter.nRefCalledAlt = 1L;
if ( printInterestingSites )
System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart());
@ -439,12 +439,12 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
counter.nNoStatusCalledAlt = 1L;
}
else if (call.isCalledRef(callConf)) {
if (vcComp.getAttribute("GV").equals("T")) {
if ( status == GVstatus.T ) {
counter.nAltCalledRef = 1L;
if ( printInterestingSites )
System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart());
}
else if (vcComp.getAttribute("GV").equals("F"))
else if ( status == GVstatus.F )
counter.nRefCalledRef = 1L;
else
@ -452,9 +452,9 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
}
else {
counter.nNotConfidentCalls = 1L;
if (vcComp.getAttribute("GV").equals("T"))
if ( status == GVstatus.T )
counter.nAltNotCalled = 1L;
else if (vcComp.getAttribute("GV").equals("F"))
else if ( status == GVstatus.F )
counter.nRefNotCalled = 1L;
else
counter.nNoStatusNotCalled = 1L;
@ -475,6 +475,10 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
return counter;
}
private GVstatus getGVstatus(final VariantContext vc) {
return ( !vc.hasAttribute("GV") ) ? GVstatus.NONE : (vc.getAttribute("GV").equals("T") ? GVstatus.T : GVstatus.F);
}
//---------------------------------------------------------------------------------------------------------------
//
// reduce

View File

@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
UAC.NO_SLOD = true;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
}

View File

@ -346,7 +346,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override
protected String getFreezeFields() {
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n");
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n");
}
}

View File

@ -4,12 +4,21 @@ package org.broadinstitute.sting.utils;
* Simple utility class that makes it convenient to print unit adjusted times
*/
public class AutoFormattingTime {
double timeInSeconds; // in Seconds
int precision; // for format
private final int width; // for format
private final int precision; // for format
public AutoFormattingTime(double timeInSeconds, int precision) {
double timeInSeconds; // in Seconds
private final String formatString;
public AutoFormattingTime(double timeInSeconds, final int width, int precision) {
this.width = width;
this.timeInSeconds = timeInSeconds;
this.precision = precision;
this.formatString = "%" + width + "." + precision + "f %s";
}
public AutoFormattingTime(double timeInSeconds, int precision) {
this(timeInSeconds, 6, precision);
}
public AutoFormattingTime(double timeInSeconds) {
@ -20,6 +29,20 @@ public class AutoFormattingTime {
return timeInSeconds;
}
/**
* @return the precision (a la format's %WIDTH.PERCISIONf)
*/
public int getWidth() {
return width;
}
/**
* @return the precision (a la format's %WIDTH.PERCISIONf)
*/
public int getPrecision() {
return precision;
}
/**
* Instead of 10000 s, returns 2.8 hours
* @return
@ -48,6 +71,6 @@ public class AutoFormattingTime {
}
}
return String.format("%6."+precision+"f %s", unitTime, unit);
return String.format(formatString, unitTime, unit);
}
}

View File

@ -159,6 +159,7 @@ public class VCFHeader {
*/
public void addMetaDataLine(VCFHeaderLine headerLine) {
mMetaData.add(headerLine);
loadMetaDataMaps();
}
/**
@ -236,7 +237,6 @@ public class VCFHeader {
+ VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally"
+ " automatically adding a corresponding PL field to your VCF header");
addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
loadMetaDataMaps();
}
}

View File

@ -99,9 +99,7 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
}
@Override
public void put( final T value, final int... keys ) {
super.put(value, keys);
public boolean put( final T value, final int... keys ) {
StringBuilder logEntry = new StringBuilder();
logEntry.append(logEntryLabel);
@ -116,5 +114,7 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
// PrintStream methods all use synchronized blocks internally, so our logging is thread-safe
log.println(logEntry.toString());
return super.put(value, keys);
}
}

View File

@ -25,9 +25,11 @@
package org.broadinstitute.sting.utils.collections;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
@ -38,18 +40,51 @@ import java.util.List;
public class NestedIntegerArray<T> {
private static Logger logger = Logger.getLogger(NestedIntegerArray.class);
protected final Object[] data;
protected final int numDimensions;
protected final int[] dimensions;
// Preallocate the first two dimensions to limit contention during tree traversals in put()
private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2;
public NestedIntegerArray(final int... dimensions) {
numDimensions = dimensions.length;
if ( numDimensions == 0 )
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
this.dimensions = dimensions.clone();
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
data = new Object[dimensions[0]];
preallocateArray(data, 0, dimensionsToPreallocate);
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
}
/**
* Recursively allocate the first dimensionsToPreallocate dimensions of the tree
*
* Pre-allocating the first few dimensions helps limit contention during tree traversals in put()
*
* @param subarray current node in the tree
* @param dimension current level in the tree
* @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first)
*/
private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) {
if ( dimension >= dimensionsToPreallocate - 1 ) {
return;
}
for ( int i = 0; i < subarray.length; i++ ) {
subarray[i] = new Object[dimensions[dimension + 1]];
preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate);
}
}
public T get(final int... keys) {
@ -59,14 +94,30 @@ public class NestedIntegerArray<T> {
for( int i = 0; i < numNestedDimensions; i++ ) {
if ( keys[i] >= dimensions[i] )
return null;
myData = (Object[])myData[keys[i]];
if ( myData == null )
return null;
}
return (T)myData[keys[numNestedDimensions]];
}
public synchronized void put(final T value, final int... keys) { // WARNING! value comes before the keys!
/**
* Insert a value at the position specified by the given keys.
*
* This method is thread-safe, however the caller MUST check the
* return value to see if the put succeeded. This method RETURNS FALSE if
* the value could not be inserted because there already was a value present
* at the specified location. In this case the caller should do a get() to get
* the already-existing value and (potentially) update it.
*
* @param value value to insert
* @param keys keys specifying the location of the value in the tree
* @return true if the value was inserted, false if it could not be inserted because there was already
* a value at the specified position
*/
public boolean put(final T value, final int... keys) { // WARNING! value comes before the keys!
if ( keys.length != numDimensions )
throw new ReviewedStingException("Exactly " + numDimensions + " keys should be passed to this NestedIntegerArray but " + keys.length + " were provided");
@ -75,15 +126,35 @@ public class NestedIntegerArray<T> {
for ( int i = 0; i < numNestedDimensions; i++ ) {
if ( keys[i] >= dimensions[i] )
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
Object[] temp = (Object[])myData[keys[i]];
if ( temp == null ) {
temp = new Object[dimensions[i+1]];
myData[keys[i]] = temp;
// If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized
// check to see if the next branch exists, and if it doesn't, create it
if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) {
synchronized ( myData ) {
if ( myData[keys[i]] == null ) {
myData[keys[i]] = new Object[dimensions[i + 1]];
}
}
}
myData = temp;
myData = (Object[])myData[keys[i]];
}
myData[keys[numNestedDimensions]] = value;
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
// Insert the new value only if there still isn't any existing value in this position
if ( myData[keys[numNestedDimensions]] == null ) {
myData[keys[numNestedDimensions]] = value;
}
else {
// Already have a value for this leaf (perhaps another thread came along and inserted one
// while we traversed the tree), so return false to notify the caller that we didn't put
// the item
return false;
}
}
return true;
}
public List<T> getAllValues() {

View File

@ -22,26 +22,29 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
package org.broadinstitute.sting.utils.genotyper;
//import org.broadinstitute.sting.gatk.walkers.Requires;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.*;
public class PerReadAlleleLikelihoodMap {
public abstract class PerReadAlleleLikelihoodMap {
public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
private List<Allele> alleles;
private Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
public PerReadAlleleLikelihoodMap() {
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
alleles = new ArrayList<Allele>();
}
protected List<Allele> alleles;
protected Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
public abstract void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log);
public abstract ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log);
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
Map<Allele,Double> likelihoodMap;
@ -95,16 +98,6 @@ public class PerReadAlleleLikelihoodMap {
public int getNumberOfStoredElements() {
return likelihoodReadMap.size();
}
/**
* Returns list of reads greedily associated with a particular allele.
* Needs to loop for each read, and assign to each allele
* @param a Desired allele
* @return
*/
@Requires("a!=null")
public List<GATKSAMRecord> getReadsAssociatedWithAllele(Allele a) {
return null;
}
public Map<Allele,Double> getLikelihoodsAssociatedWithPileupElement(PileupElement p) {
if (!likelihoodReadMap.containsKey(p.getRead()))
@ -129,4 +122,16 @@ public class PerReadAlleleLikelihoodMap {
}
return (maxLike - prevMaxLike > INDEL_LIKELIHOOD_THRESH ? mostLikelyAllele : Allele.NO_CALL );
}
}
public static PerReadAlleleLikelihoodMap getBestAvailablePerReadAlleleLikelihoodMap() {
final Class PerReadAlleleLikelihoodMapClass = GATKLiteUtils.getProtectedClassIfAvailable(PerReadAlleleLikelihoodMap.class);
try {
Constructor constructor = PerReadAlleleLikelihoodMapClass.getDeclaredConstructor((Class[])null);
constructor.setAccessible(true);
return (PerReadAlleleLikelihoodMap)constructor.newInstance();
}
catch (Exception e) {
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + PerReadAlleleLikelihoodMapClass.getSimpleName());
}
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotyper;
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.PrintStream;
import java.util.*;
public class StandardPerReadAlleleLikelihoodMap extends PerReadAlleleLikelihoodMap implements PublicPackageSource {
public StandardPerReadAlleleLikelihoodMap() {
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
alleles = new ArrayList<Allele>();
}
// not implemented in the standard version
public void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log) {}
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { return pileup; }
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.progressmeter;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.*;
@ -200,6 +201,14 @@ public class ProgressMeter {
"Location", processingUnitName, processingUnitName));
}
/**
* @return the current runtime in nanoseconds
*/
@Ensures("result >= 0")
public long getRuntimeInNanoseconds() {
return timer.getElapsedTimeNano();
}
/**
* Utility routine that prints out process information (including timing) every N records or
* every M seconds, for N and M set in global variables.

View File

@ -61,17 +61,6 @@ public class BaseRecalibration {
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
// }
/**
* Thread local cache to allow multi-threaded use of this class
*/
private ThreadLocal<ReadCovariates> readCovariatesCache;
{
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
@Override protected ReadCovariates initialValue() {
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
}
};
}
/**
* Constructor using a GATK Report file
@ -113,13 +102,7 @@ public class BaseRecalibration {
}
}
// Compute all covariates for the read
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
// TODO -- needs to be fixed.
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates);
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {

View File

@ -66,7 +66,9 @@ public class ReadGroupCovariate implements RequiredCovariate {
}
@Override
public String formatKey(final int key) {
public synchronized String formatKey(final int key) {
// This method is synchronized so that we don't attempt to do a get()
// from the reverse lookup table while that table is being updated
return readGroupReverseLookupTable.get(key);
}
@ -76,17 +78,32 @@ public class ReadGroupCovariate implements RequiredCovariate {
}
private int keyForReadGroup(final String readGroupId) {
if (!readGroupLookupTable.containsKey(readGroupId)) {
readGroupLookupTable.put(readGroupId, nextId);
readGroupReverseLookupTable.put(nextId, readGroupId);
nextId++;
// Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR),
// synchronize only the table updates.
// Before entering the synchronized block, check to see if this read group is not in our tables.
// If it's not, either we will have to insert it, OR another thread will insert it first.
// This preliminary check avoids doing any synchronization most of the time.
if ( ! readGroupLookupTable.containsKey(readGroupId) ) {
synchronized ( this ) {
// Now we need to make sure the key is STILL not there, since another thread may have come along
// and inserted it while we were waiting to enter this synchronized block!
if ( ! readGroupLookupTable.containsKey(readGroupId) ) {
readGroupLookupTable.put(readGroupId, nextId);
readGroupReverseLookupTable.put(nextId, readGroupId);
nextId++;
}
}
}
return readGroupLookupTable.get(readGroupId);
}
@Override
public int maximumKeyValue() {
public synchronized int maximumKeyValue() {
// Synchronized so that we don't query table size while the tables are being updated
return readGroupLookupTable.size() - 1;
}

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.threading;
import java.lang.reflect.Array;
/**
* ThreadLocal implementation for arrays
*
* Example usage:
*
* private ThreadLocal<byte[]> threadLocalByteArray = new ThreadLocalArray<byte[]>(length, byte.class);
* ....
* byte[] byteArray = threadLocalByteArray.get();
*
* @param <T> the type of the array itself (eg., int[], double[], etc.)
*
* @author David Roazen
*/
public class ThreadLocalArray<T> extends ThreadLocal<T> {
private int arraySize;
private Class arrayElementType;
/**
* Create a new ThreadLocalArray
*
* @param arraySize desired length of the array
* @param arrayElementType type of the elements within the array (eg., Byte.class, Integer.class, etc.)
*/
public ThreadLocalArray( int arraySize, Class arrayElementType ) {
super();
this.arraySize = arraySize;
this.arrayElementType = arrayElementType;
}
@Override
@SuppressWarnings("unchecked")
protected T initialValue() {
return (T)Array.newInstance(arrayElementType, arraySize);
}
}

View File

@ -193,6 +193,8 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
infoMap.put("isHet", g.isHet() ? "1" : "0");
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
if ( g.hasDP() )
infoMap.put(VCFConstants.DEPTH_KEY, g.getDP());
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
infoMap.put(e.getKey(), e.getValue());

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
/**
*
*/
public class MaxRuntimeIntegrationTest extends WalkerTest {
private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(20, TimeUnit.SECONDS);
private class MaxRuntimeTestProvider extends TestDataProvider {
final long maxRuntime;
final TimeUnit unit;
public MaxRuntimeTestProvider(final long maxRuntime, final TimeUnit unit) {
super(MaxRuntimeTestProvider.class);
this.maxRuntime = maxRuntime;
this.unit = unit;
setName(String.format("Max runtime test : %d of %s", maxRuntime, unit));
}
public long expectedMaxRuntimeNano() {
return TimeUnit.NANOSECONDS.convert(maxRuntime, unit) + STARTUP_TIME;
}
}
@DataProvider(name = "MaxRuntimeProvider")
public Object[][] makeMaxRuntimeProvider() {
for ( final TimeUnit requestedUnits : Arrays.asList(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, TimeUnit.MINUTES) )
new MaxRuntimeTestProvider(requestedUnits.convert(30, TimeUnit.SECONDS), requestedUnits);
return MaxRuntimeTestProvider.getTests(MaxRuntimeTestProvider.class);
}
//
// Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type
//
@Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 60 * 1000)
public void testMaxRuntime(final MaxRuntimeTestProvider cfg) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + hg18Reference
+ " -I " + validationDataLocation + "NA12878.WEx.downsampled20x.bam -o /dev/null"
+ " -maxRuntime " + cfg.maxRuntime + " -maxRuntimeUnits " + cfg.unit, 0,
Collections.<String>emptyList());
final SimpleTimer timer = new SimpleTimer().start();
executeTest("Max runtime " + cfg, spec);
final long actualRuntimeNano = timer.getElapsedTimeNano();
Assert.assertTrue(actualRuntimeNano < cfg.expectedMaxRuntimeNano(),
"Actual runtime " + TimeUnit.SECONDS.convert(actualRuntimeNano, TimeUnit.NANOSECONDS)
+ " exceeded max. tolerated runtime " + TimeUnit.SECONDS.convert(cfg.expectedMaxRuntimeNano(), TimeUnit.NANOSECONDS)
+ " given requested runtime " + cfg.maxRuntime + " " + cfg.unit);
}
}

View File

@ -108,4 +108,22 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
Arrays.asList("8ed32a2272bab8043a255362335395ef"));
executeTest("testUnfilteredBecomesFilteredAndPass", spec);
}
@Test
public void testFilteringDPfromINFO() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --filterExpression 'DP < 8' --filterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
Arrays.asList("a01f7cce53ea556c9741aa60b6124c41"));
executeTest("testFilteringDPfromINFO", spec);
}
@Test
public void testFilteringDPfromFORMAT() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
Arrays.asList("e10485c7c33d9211d0c1294fd7858476"));
executeTest("testFilteringDPfromFORMAT", spec);
}
}

View File

@ -16,9 +16,9 @@ import java.util.List;
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam";
// --------------------------------------------------------------------------------------------------------------
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("847605f4efafef89529fe0e496315edd"));
Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a"));
executeTest("test MultiSample Pilot1", spec);
}
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("bc15123620e1134f799005d71d1180fe"));
Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("1ba7afccc8552f20d72d0b62237558e3"));
Arrays.asList("24532eb381724cd74e99370da28d49ed"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -54,22 +54,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("afb8768f31ab57eb43f75c1115eadc99"));
Arrays.asList("062a946160eec1d0fc135d58ca654ff4"));
executeTest("test SingleSample Pilot2", spec);
}
@Test
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("543f68e42034bf44cfb24da8c9204320"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("94dc17d76d841f1d3a36160767ffa034"));
executeTest("test Multiple SNP alleles", spec);
}
@Test
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("d915535c1458733f09f82670092fcab6"));
executeTest("test bad read", spec);
}
@ -77,16 +77,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("9106d01ca0d0a8fedd068e72d509f380"));
executeTest("test reverse trim", spec);
}
@Test
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("d847acf841ba8ba653f996ce4869f439"));
executeTest("test mismatched PLs", spec);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "fd236bd635d514e4214d364f45ec4d10";
private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad";
@Test
public void testCompressedOutput() {
@ -120,21 +120,21 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
String md5 = "d408b4661b820ed86272415b8ea08780";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
Arrays.asList(md5));
executeTest("test parallelization (single thread)", spec1);
GenomeAnalysisEngine.resetRandomGenerator();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1,
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1,
Arrays.asList(md5));
executeTest("test parallelization (2 threads)", spec2);
GenomeAnalysisEngine.resetRandomGenerator();
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1,
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1,
Arrays.asList(md5));
executeTest("test parallelization (4 threads)", spec3);
}
@ -149,15 +149,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("839ecd30d354a36b5dfa2b5e99859765"));
Arrays.asList("56157d930da6ccd224bce1ca93f11e41"));
executeTest("test min_base_quality_score 26", spec);
}
@Test
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9"));
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603"));
executeTest("test SLOD", spec);
}
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("abd8e33e649cc11b55e200d3940cc7e2"));
Arrays.asList("480437dd6e2760f4ab3194431519f331"));
executeTest("test NDA", spec);
}
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("8a9b424e00cdbe6b5e73d517335b2186"));
Arrays.asList("22c039412fd387dde6125b07c9a74a25"));
executeTest("test using comp track", spec);
}
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "97ba874eafc9884a4de027a84c036311");
testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645");
testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841");
}
private void testOutputParameters(final String args, final String md5) {
@ -211,18 +211,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
Arrays.asList("df524e98903d96ab9353bee7c16a69de"));
executeTest("test confidence 1", spec1);
}
@Test
public void testConfidence2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
executeTest("test confidence 2", spec2);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing heterozygosity
@ -230,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "986923de51c71635d47e3d06fe3794a1" );
testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "fb12b1553f813004a394a391a8540873" );
testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -259,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("04a87b87ee4323eba853c78f25551d1a"));
Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -278,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("98058fc913b61c22d44875da1f5ea89c"));
Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -297,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("650c53774afacfc07a595675e8cdde17"));
Arrays.asList("3593495aab5f6204c65de0b073a6ff65"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -312,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("6a0c2a3a7bcc56ad01428c71408055aa"));
Arrays.asList("8b486a098029d5a106b0a37eff541c15"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -325,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("5f2721c3323de5390d2d47446139f32b"));
Arrays.asList("18efedc50cae2aacaba372265e38310b"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -335,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a"));
Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -345,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f"));
Arrays.asList("578c0540f4f2052a634a829bcb9cc27d"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -353,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -369,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@ -415,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("ba4fafec383fb988f20c8cf53dd3e9a0"));
Arrays.asList("857b8e5df444463ac27f665c4f67fbe2"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -423,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("4c57a88de275105156aaafc6f9041365"));
Arrays.asList("81d4c7d9010fd6733b2997bc378e7471"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -444,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
Arrays.asList("e8ebfaac0804b782f22ab8ea35152735"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@ -458,26 +450,42 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("da9c05f87bd6415e97f90c49cf68ed19"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("1b711c0966a2f76eb21801e73b76b758"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "1d4a826b144723ff0766c36aa0239287");
testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "68ef51d5c98480e0c0192e0eecb95bca");
testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5");
}
private void testReducedCalling(final String model, final String md5) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
Arrays.asList(md5));
executeTest("test calling on a ReducedRead BAM with " + model, spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing contamination down-sampling
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testContaminationDownsampling() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}
}

View File

@ -32,11 +32,12 @@ public class NanoSchedulerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T UnifiedGenotyper -R " + b37KGReference,
"-nosl --no_cmdline_in_header -G",
"--no_cmdline_in_header -G",
//"--dbsnp " + b37dbSNP132,
"-I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam",
"-L 20:10,000,000-10,100,000",
"-glm " + glm,
"--contamination_fraction_to_filter 0.0",
"-nt " + nt,
"-nct " + nct,
"-o %s"

View File

@ -23,6 +23,7 @@ import java.util.List;
* To change this template use File | Settings | File Templates.
*/
public class NanoSchedulerUnitTest extends BaseTest {
private final static boolean DEBUG = false;
private final static boolean debug = false;
public static final int NANO_SCHEDULE_MAX_RUNTIME = 30000;
@ -30,19 +31,22 @@ public class NanoSchedulerUnitTest extends BaseTest {
@Override public Integer apply(Integer input) { return input * 2; }
}
private static void maybeDelayMe(final int input) {
try {
if ( input % 7 == 0 ) {
final int milliToSleep = (input % 10);
//System.out.printf("Sleeping %d millseconds%n", milliToSleep);
Thread.sleep(milliToSleep);
}
} catch ( InterruptedException ex ) {
throw new RuntimeException(ex);
}
}
private static class Map2xWithDelays extends Map2x {
@Override public Integer apply(Integer input) {
try {
if ( input % 7 == 0 ) {
final int milliToSleep = (input % 10);
//System.out.printf("Sleeping %d millseconds%n", milliToSleep);
Thread.sleep(milliToSleep);
}
return input * 2;
} catch ( InterruptedException ex ) {
throw new RuntimeException(ex);
}
maybeDelayMe(input);
return input * 2;
}
}
@ -117,10 +121,12 @@ public class NanoSchedulerUnitTest extends BaseTest {
}
static NanoSchedulerBasicTest exampleTest = null;
static NanoSchedulerBasicTest exampleTestWithDelays = null;
@BeforeSuite
public void setUp() throws Exception {
exampleTest = new NanoSchedulerBasicTest(10, 2, 1, 10, false);
exampleTestWithDelays = new NanoSchedulerBasicTest(10, 2, 1, 10, true);
}
@DataProvider(name = "NanoSchedulerBasicTest")
@ -151,14 +157,14 @@ public class NanoSchedulerUnitTest extends BaseTest {
return NanoSchedulerBasicTest.getTests(NanoSchedulerBasicTest.class);
}
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
@Test(enabled = true && ! DEBUG, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testSingleThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
logger.warn("Running " + test);
if ( test.nThreads == 1 )
testNanoScheduler(test);
}
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME, dependsOnMethods = "testSingleThreadedNanoScheduler")
@Test(enabled = true && ! DEBUG, dataProvider = "NanoSchedulerBasicTest", timeOut = NANO_SCHEDULE_MAX_RUNTIME, dependsOnMethods = "testSingleThreadedNanoScheduler")
public void testMultiThreadedNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
logger.warn("Running " + test);
if ( test.nThreads >= 1 )
@ -195,7 +201,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
}
}
@Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
@Test(enabled = true && ! DEBUG, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testNanoSchedulerInLoop(final NanoSchedulerBasicTest test) throws InterruptedException {
if ( test.bufferSize > 1) {
logger.warn("Running " + test);
@ -213,7 +219,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
}
}
@Test(timeOut = NANO_SCHEDULE_MAX_RUNTIME)
@Test(enabled = true && ! DEBUG, timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testShutdown() throws InterruptedException {
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
Assert.assertFalse(nanoScheduler.isShutdown(), "scheduler should be alive");
@ -221,38 +227,78 @@ public class NanoSchedulerUnitTest extends BaseTest {
Assert.assertTrue(nanoScheduler.isShutdown(), "scheduler should be dead");
}
@Test(expectedExceptions = IllegalStateException.class, timeOut = NANO_SCHEDULE_MAX_RUNTIME)
@Test(enabled = true && ! DEBUG, expectedExceptions = IllegalStateException.class, timeOut = NANO_SCHEDULE_MAX_RUNTIME)
public void testShutdownExecuteFailure() throws InterruptedException {
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
nanoScheduler.shutdown();
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
}
@Test(expectedExceptions = NullPointerException.class, timeOut = 10000, invocationCount = 50)
@DataProvider(name = "NanoSchedulerInputExceptionTest")
public Object[][] createNanoSchedulerInputExceptionTest() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int bufSize : Arrays.asList(100) ) {
for ( final int nThreads : Arrays.asList(8) ) {
for ( final boolean addDelays : Arrays.asList(true, false) ) {
final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(bufSize, nThreads, 1, 1000000, false);
final int maxN = addDelays ? 10000 : 100000;
for ( int nElementsBeforeError = 0; nElementsBeforeError < maxN; nElementsBeforeError += Math.max(nElementsBeforeError / 10, 1) ) {
tests.add(new Object[]{nElementsBeforeError, test, addDelays});
}
}
}
}
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, expectedExceptions = NullPointerException.class, timeOut = 10000)
public void testInputErrorIsThrown_NPE() throws InterruptedException {
executeTestErrorThrowingInput(new NullPointerException());
executeTestErrorThrowingInput(10, new NullPointerException(), exampleTest, false);
}
@Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000, invocationCount = 50)
@Test(enabled = true, expectedExceptions = ReviewedStingException.class, timeOut = 10000)
public void testInputErrorIsThrown_RSE() throws InterruptedException {
executeTestErrorThrowingInput(new ReviewedStingException("test"));
executeTestErrorThrowingInput(10, new ReviewedStingException("test"), exampleTest, false);
}
private void executeTestErrorThrowingInput(final RuntimeException ex) {
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
nanoScheduler.execute(new ErrorThrowingIterator(ex), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
@Test(enabled = true, expectedExceptions = NullPointerException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 10000, invocationCount = 1)
public void testInputErrorDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
executeTestErrorThrowingInput(nElementsBeforeError, new NullPointerException(), test, addDelays);
}
private void executeTestErrorThrowingInput(final int nElementsBeforeError, final RuntimeException ex, final NanoSchedulerBasicTest test, final boolean addDelays) {
logger.warn("executeTestErrorThrowingInput " + nElementsBeforeError + " ex=" + ex + " test=" + test + " addInputDelays=" + addDelays);
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = test.makeScheduler();
nanoScheduler.execute(new ErrorThrowingIterator(nElementsBeforeError, ex, addDelays), test.makeMap(), test.initReduce(), test.makeReduce());
}
private static class ErrorThrowingIterator implements Iterator<Integer> {
final int nElementsBeforeError;
final boolean addDelays;
int i = 0;
final RuntimeException ex;
private ErrorThrowingIterator(RuntimeException ex) {
private ErrorThrowingIterator(final int nElementsBeforeError, RuntimeException ex, boolean addDelays) {
this.nElementsBeforeError = nElementsBeforeError;
this.ex = ex;
this.addDelays = addDelays;
}
@Override public boolean hasNext() { throw ex; }
@Override public Integer next() { throw ex; }
@Override public void remove() { throw ex; }
@Override public boolean hasNext() { return true; }
@Override public Integer next() {
if ( i++ > nElementsBeforeError ) {
throw ex;
} else if ( addDelays ) {
maybeDelayMe(i);
return i;
} else {
return i;
}
}
@Override public void remove() { throw new UnsupportedOperationException("x"); }
}
public static void main(String [ ] args) {

View File

@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
"Mills_and_1000G_gold_standard.indels", b37, true, false))
//
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
//
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
//
// example call set for wiki tutorial
//