Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
db8cbb5444
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class AlleleBiasedDownsamplingUtils {
|
||||
|
||||
/**
|
||||
* Computes an allele biased version of the given pileup
|
||||
*
|
||||
* @param pileup the original pileup
|
||||
* @param downsamplingFraction the fraction of total reads to remove per allele
|
||||
* @param log logging output
|
||||
* @return allele biased pileup
|
||||
*/
|
||||
public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
|
||||
// special case removal of all or no reads
|
||||
if ( downsamplingFraction <= 0.0 )
|
||||
return pileup;
|
||||
if ( downsamplingFraction >= 1.0 )
|
||||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
|
||||
|
||||
final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
|
||||
|
||||
// start by stratifying the reads by the alleles they represent at this position
|
||||
for( final PileupElement pe : pileup ) {
|
||||
// abort if we have a reduced read - we do not want to remove it!
|
||||
if ( pe.getRead().isReducedRead() )
|
||||
return pileup;
|
||||
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
|
||||
if ( baseIndex != -1 )
|
||||
alleleStratifiedElements[baseIndex].add(pe);
|
||||
}
|
||||
|
||||
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
|
||||
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
|
||||
int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor
|
||||
final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
|
||||
@Override
|
||||
public int compare(PileupElement element1, PileupElement element2) {
|
||||
final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
|
||||
return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
|
||||
}
|
||||
});
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
|
||||
if ( alleleList.size() <= numReadsToRemove )
|
||||
logAllElements(alleleList, log);
|
||||
else
|
||||
elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove, log));
|
||||
}
|
||||
|
||||
// clean up pointers so memory can be garbage collected if needed
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i].clear();
|
||||
|
||||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs allele biased down-sampling on a pileup and computes the list of elements to keep
|
||||
*
|
||||
* @param elements original list of records
|
||||
* @param numElementsToRemove the number of records to remove
|
||||
* @param log logging output
|
||||
* @return the list of pileup elements TO KEEP
|
||||
*/
|
||||
private static List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove, final PrintStream log) {
|
||||
final int pileupSize = elements.size();
|
||||
final BitSet itemsToRemove = new BitSet(pileupSize);
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
}
|
||||
|
||||
ArrayList<PileupElement> elementsToKeep = new ArrayList<PileupElement>(pileupSize - numElementsToRemove);
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
if ( itemsToRemove.get(i) )
|
||||
logRead(elements.get(i).getRead(), log);
|
||||
else
|
||||
elementsToKeep.add(elements.get(i));
|
||||
}
|
||||
|
||||
return elementsToKeep;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes reads to remove based on an allele biased down-sampling
|
||||
*
|
||||
* @param alleleReadMap original list of records per allele
|
||||
* @param downsamplingFraction the fraction of total reads to remove per allele
|
||||
* @param log logging output
|
||||
* @return list of reads TO REMOVE from allele biased down-sampling
|
||||
*/
|
||||
public static List<GATKSAMRecord> selectAlleleBiasedReads(final Map<Allele, List<GATKSAMRecord>> alleleReadMap, final double downsamplingFraction, final PrintStream log) {
|
||||
int totalReads = 0;
|
||||
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() )
|
||||
totalReads += reads.size();
|
||||
|
||||
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
|
||||
int numReadsToRemove = (int)(totalReads * downsamplingFraction);
|
||||
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove * alleleReadMap.size());
|
||||
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() ) {
|
||||
if ( reads.size() <= numReadsToRemove ) {
|
||||
readsToRemove.addAll(reads);
|
||||
logAllReads(reads, log);
|
||||
} else {
|
||||
readsToRemove.addAll(downsampleReads(reads, numReadsToRemove, log));
|
||||
}
|
||||
}
|
||||
|
||||
return readsToRemove;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs allele biased down-sampling on a pileup and computes the list of elements to remove
|
||||
*
|
||||
* @param reads original list of records
|
||||
* @param numElementsToRemove the number of records to remove
|
||||
* @param log logging output
|
||||
* @return the list of pileup elements TO REMOVE
|
||||
*/
|
||||
private static List<GATKSAMRecord> downsampleReads(final List<GATKSAMRecord> reads, final int numElementsToRemove, final PrintStream log) {
|
||||
final int pileupSize = reads.size();
|
||||
final BitSet itemsToRemove = new BitSet(pileupSize);
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
}
|
||||
|
||||
ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(pileupSize - numElementsToRemove);
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
if ( itemsToRemove.get(i) ) {
|
||||
final GATKSAMRecord read = reads.get(i);
|
||||
readsToRemove.add(read);
|
||||
logRead(read, log);
|
||||
}
|
||||
}
|
||||
|
||||
return readsToRemove;
|
||||
}
|
||||
|
||||
private static void logAllElements(final List<PileupElement> elements, final PrintStream log) {
|
||||
if ( log != null ) {
|
||||
for ( final PileupElement p : elements )
|
||||
logRead(p.getRead(), log);
|
||||
}
|
||||
}
|
||||
|
||||
private static void logAllReads(final List<GATKSAMRecord> reads, final PrintStream log) {
|
||||
if ( log != null ) {
|
||||
for ( final GATKSAMRecord read : reads )
|
||||
logRead(read, log);
|
||||
}
|
||||
}
|
||||
|
||||
private static void logRead(final SAMRecord read, final PrintStream log) {
|
||||
if ( log != null ) {
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
log.println(String.format("%s\t%s\t%s\t%s", read.getReadName(), readGroup.getSample(), readGroup.getLibrary(), readGroup.getPlatformUnit()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -28,26 +28,22 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadLocalArray;
|
||||
|
||||
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
|
||||
|
||||
// optimizations: don't reallocate an array each time
|
||||
private byte[] tempQualArray;
|
||||
private boolean[] tempErrorArray;
|
||||
private double[] tempFractionalErrorArray;
|
||||
// optimization: only allocate temp arrays once per thread
|
||||
private final ThreadLocal<byte[]> threadLocalTempQualArray = new ThreadLocalArray<byte[]>(EventType.values().length, byte.class);
|
||||
private final ThreadLocal<boolean[]> threadLocalTempErrorArray = new ThreadLocalArray<boolean[]>(EventType.values().length, boolean.class);
|
||||
private final ThreadLocal<double[]> threadLocalTempFractionalErrorArray = new ThreadLocalArray<double[]>(EventType.values().length, double.class);
|
||||
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
|
||||
super.initialize(covariates, recalibrationTables);
|
||||
tempQualArray = new byte[EventType.values().length];
|
||||
tempErrorArray = new boolean[EventType.values().length];
|
||||
tempFractionalErrorArray = new double[EventType.values().length];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -60,10 +56,13 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
* @param refBase The reference base at this locus
|
||||
*/
|
||||
@Override
|
||||
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
|
||||
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
|
||||
final int offset = pileupElement.getOffset();
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
|
||||
|
||||
byte[] tempQualArray = threadLocalTempQualArray.get();
|
||||
boolean[] tempErrorArray = threadLocalTempErrorArray.get();
|
||||
|
||||
tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual();
|
||||
tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
|
||||
tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual();
|
||||
|
|
@ -77,40 +76,29 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
final byte qual = tempQualArray[eventIndex];
|
||||
final boolean isError = tempErrorArray[eventIndex];
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
// TODO: should this really be combine rather than increment?
|
||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
else
|
||||
qualPreviousDatum.increment(isError);
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
|
||||
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
|
||||
if (covPreviousDatum == null)
|
||||
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
|
||||
else
|
||||
covPreviousDatum.increment(isError);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( !skip[offset] ) {
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(read);
|
||||
|
||||
byte[] tempQualArray = threadLocalTempQualArray.get();
|
||||
double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get();
|
||||
|
||||
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
|
||||
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
|
||||
|
|
@ -124,30 +112,15 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
final byte qual = tempQualArray[eventIndex];
|
||||
final double isError = tempFractionalErrorArray[eventIndex];
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
else
|
||||
qualPreviousDatum.increment(1.0, isError);
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
|
||||
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
|
||||
if (covPreviousDatum == null)
|
||||
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
|
||||
else
|
||||
covPreviousDatum.increment(1.0, isError);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -276,7 +276,10 @@ public class SlidingWindow {
|
|||
|
||||
final int windowHeaderStartLocation = getStartLocation(windowHeader);
|
||||
final int sizeOfMarkedRegion = stop - windowHeaderStartLocation + contextSize + 1;
|
||||
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion);
|
||||
|
||||
// copy over as many bits as we can from the previous calculation. Note that we can't trust the
|
||||
// last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there.
|
||||
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1;
|
||||
final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize);
|
||||
|
||||
// update the iterator to the correct position
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -60,7 +60,7 @@ public class ErrorModel {
|
|||
|
||||
boolean hasCalledAlleles = false;
|
||||
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
|
||||
if (refSampleVC != null) {
|
||||
|
||||
for (Allele allele : refSampleVC.getAlleles()) {
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
HashMap<String, ErrorModel> perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts);
|
||||
if (perLaneErrorModels == null && UAC.referenceSampleName != null)
|
||||
|
|
@ -231,7 +231,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
|
||||
// no likelihoods have been computed for this sample at this site
|
||||
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
|
||||
perReadAlleleLikelihoodMap.put(sample.getKey(), org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap());
|
||||
}
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
|
|
@ -333,7 +333,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap);
|
||||
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap);
|
||||
|
||||
protected abstract List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
double[][] readHaplotypeLikelihoods;
|
||||
|
||||
final byte refBase;
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
|
||||
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
|
||||
|
||||
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
|
|
@ -37,7 +37,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
|||
final PairHMMIndelErrorModel pairModel,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext referenceContext,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||
this.pairModel = pairModel;
|
||||
this.haplotypeMap = haplotypeMap;
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
|
|||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General
|
|||
final boolean useBQAedPileup,
|
||||
final ReferenceContext ref,
|
||||
final boolean ignoreLaneInformation,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
final org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class AFCalcTestBuilder {
|
|||
}
|
||||
|
||||
public AFCalc makeModel() {
|
||||
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2);
|
||||
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), 2);
|
||||
}
|
||||
|
||||
public double[] makePriors() {
|
||||
|
|
|
|||
|
|
@ -40,22 +40,20 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
this.ploidy = ploidy;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected VariantContext reduceScope(VariantContext vc) {
|
||||
final int maxAltAlleles = vc.getType().equals(VariantContext.Type.INDEL) ? maxAlternateAllelesForIndels : maxAlternateAllelesToGenotype;
|
||||
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > maxAltAlleles) {
|
||||
logger.warn("this tool is currently set to genotype at most " + maxAltAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles()) {
|
||||
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>(maxAltAlleles + 1);
|
||||
final List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, maxAltAlleles, ploidy));
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles(), ploidy));
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
builder.alleles(alleles);
|
||||
|
|
|
|||
|
|
@ -217,7 +217,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
cleanUpSymbolicUnassembledEvents( haplotypes );
|
||||
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure
|
||||
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
|
||||
mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
|
||||
}
|
||||
if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -424,7 +425,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
||||
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
||||
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
||||
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class LikelihoodCalculationEngine {
|
||||
|
|
@ -139,7 +140,7 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
|
||||
private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
|
||||
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
|
||||
for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ) {
|
||||
if( b1[iii] != b2[iii] ) {
|
||||
return iii;
|
||||
}
|
||||
|
|
@ -346,11 +347,13 @@ public class LikelihoodCalculationEngine {
|
|||
public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
|
||||
final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
|
||||
final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
||||
final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call,
|
||||
final double downsamplingFraction,
|
||||
final PrintStream downsamplingLog ) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
||||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
|
||||
|
||||
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
||||
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
|
||||
|
|
@ -370,6 +373,9 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
}
|
||||
|
||||
// down-sample before adding filtered reads
|
||||
likelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog);
|
||||
|
||||
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
||||
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||
|
|
|
|||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.genotyper;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLikelihoodMap implements ProtectedPackageSource {
|
||||
|
||||
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
|
||||
return AlleleBiasedDownsamplingUtils.createAlleleBiasedBasePileup(pileup, downsamplingFraction, log);
|
||||
}
|
||||
|
||||
public void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log) {
|
||||
// special case removal of all or no reads
|
||||
if ( downsamplingFraction <= 0.0 )
|
||||
return;
|
||||
if ( downsamplingFraction >= 1.0 ) {
|
||||
likelihoodReadMap.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// start by stratifying the reads by the alleles they represent at this position
|
||||
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>(alleles.size());
|
||||
for ( Allele allele : alleles )
|
||||
alleleReadMap.put(allele, new ArrayList<GATKSAMRecord>());
|
||||
|
||||
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
|
||||
// do not remove reduced reads!
|
||||
if ( !entry.getKey().isReducedRead() ) {
|
||||
final Allele bestAllele = getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele != Allele.NO_CALL )
|
||||
alleleReadMap.get(bestAllele).add(entry.getKey());
|
||||
}
|
||||
}
|
||||
|
||||
// compute the reads to remove and actually remove them
|
||||
final List<GATKSAMRecord> readsToRemove = AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(alleleReadMap, downsamplingFraction, log);
|
||||
for ( final GATKSAMRecord read : readsToRemove )
|
||||
likelihoodReadMap.remove(read);
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -73,11 +75,6 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
params.getCommandLine(),
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-"+params.args, spec).getFirst();
|
||||
|
||||
WalkerTestSpec specNT2 = new WalkerTestSpec(
|
||||
params.getCommandLine() + " -nt 2",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -123,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createPRTestData() {
|
||||
return new Object[][]{
|
||||
{new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")},
|
||||
{new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")},
|
||||
{new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")},
|
||||
{new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}
|
||||
};
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{1, new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")});
|
||||
tests.add(new Object[]{1, new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")});
|
||||
tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")});
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PRTest")
|
||||
public void testPR(PRTest params) {
|
||||
public void testPR(final int nct, PRTest params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads" +
|
||||
" -R " + hg18Reference +
|
||||
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
|
||||
" -nct " + nct +
|
||||
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
|
||||
params.args +
|
||||
" -o %s",
|
||||
|
|
|
|||
|
|
@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8");
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
|
||||
|
||||
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
|
||||
List<AFCalc> calcs = AFCalcFactory.createAFCalcs( Arrays.asList( AFCalcFactory.Calculation.values() ), 4, 2, 2, 2);
|
||||
List<AFCalc> calcs = AFCalcFactory.createAFCalcs( Arrays.asList( AFCalcFactory.Calculation.values() ), 4, 2, 2);
|
||||
|
||||
final int nPriorValues = 2*nSamples+1;
|
||||
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||
|
|
@ -215,7 +215,7 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative));
|
||||
|
||||
final int nSamples = samples.size();
|
||||
List<AFCalc> calcs = AFCalcFactory.createAFCalcs(Arrays.asList(AFCalcFactory.Calculation.values()), 4, 2, 2, 2);
|
||||
List<AFCalc> calcs = AFCalcFactory.createAFCalcs(Arrays.asList(AFCalcFactory.Calculation.values()), 4, 2, 2);
|
||||
|
||||
final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors
|
||||
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
|
|||
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
||||
double[] priors = new double[len]; // flat priors
|
||||
|
||||
final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, 1 + cfg.numAltAlleles, cfg.ploidy);
|
||||
final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, cfg.ploidy);
|
||||
calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors);
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
|
|
|
|||
|
|
@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "ee866a8694a6f6c77242041275350ab9");
|
||||
HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3");
|
||||
HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60");
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "30598abeeb0b0ae5816ffdbf0c4044fd");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff");
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -91,9 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f"));
|
||||
Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* A GenomeAnalysisEngine that runs a specified walker.
|
||||
|
|
@ -73,6 +74,7 @@ public class GenomeAnalysisEngine {
|
|||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class);
|
||||
public static final long NO_RUNTIME_LIMIT = -1;
|
||||
|
||||
/**
|
||||
* The GATK command-line argument parsing code.
|
||||
|
|
@ -1090,6 +1092,33 @@ public class GenomeAnalysisEngine {
|
|||
public String createApproximateCommandLineArgumentString(Object... argumentProviders) {
|
||||
return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Does the current runtime in unit exceed the runtime limit, if one has been provided?
|
||||
*
|
||||
* @param runtime the runtime of this GATK instance in minutes
|
||||
* @param unit the time unit of runtime
|
||||
* @return false if not limit was requested or if runtime <= the limit, true otherwise
|
||||
*/
|
||||
public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) {
|
||||
if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime);
|
||||
|
||||
if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT )
|
||||
return false;
|
||||
else {
|
||||
final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit);
|
||||
final long maxRuntimeNano = getRuntimeLimitInNanoseconds();
|
||||
return actualRuntimeNano > maxRuntimeNano;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the runtime limit in nanoseconds, or -1 if no limit was specified
|
||||
*/
|
||||
public long getRuntimeLimitInNanoseconds() {
|
||||
if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT )
|
||||
return -1;
|
||||
else
|
||||
return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
|
|
@ -44,6 +45,7 @@ import java.io.File;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -91,7 +93,7 @@ public class GATKArgumentCollection {
|
|||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// XXX
|
||||
// General features
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
|
@ -143,6 +145,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.")
|
||||
public boolean disableRandomization = false;
|
||||
|
||||
@Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false)
|
||||
public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT;
|
||||
|
||||
@Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc="The TimeUnit for maxRuntime", required = false)
|
||||
public TimeUnit maxRuntimeUnits = TimeUnit.MINUTES;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Downsampling Arguments
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
|
|
@ -55,29 +56,32 @@ public class StandardCallerArgumentCollection {
|
|||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||
* that you not play around with this parameter.
|
||||
*
|
||||
* As of GATK 2.2 the genotyper can handle a very large number of events, so the default maximum has been increased to 6.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 3;
|
||||
public int MAX_ALTERNATE_ALLELES = 6;
|
||||
|
||||
/**
|
||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||
* that you not play around with this parameter.
|
||||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "Maximum number of alternate alleles to genotype for indels only", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES_FOR_INDELS = 2;
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
|
||||
|
||||
/**
|
||||
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
|
||||
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
|
||||
* will try to remove (N * contamination fraction) bases for each alternate allele.
|
||||
*/
|
||||
@Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
|
||||
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
|
||||
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
|
||||
public double CONTAMINATION_PERCENTAGE = 0.0;
|
||||
@Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false)
|
||||
public PrintStream contaminationLog = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName = "logExactCalls", doc="x", required=false)
|
||||
|
|
@ -91,19 +95,12 @@ public class StandardCallerArgumentCollection {
|
|||
this.GenotypingMode = SCAC.GenotypingMode;
|
||||
this.heterozygosity = SCAC.heterozygosity;
|
||||
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
|
||||
this.MAX_ALTERNATE_ALLELES_FOR_INDELS = SCAC.MAX_ALTERNATE_ALLELES_FOR_INDELS;
|
||||
this.OutputMode = SCAC.OutputMode;
|
||||
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
this.CONTAMINATION_PERCENTAGE = SCAC.CONTAMINATION_PERCENTAGE;
|
||||
this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION;
|
||||
this.contaminationLog = SCAC.contaminationLog;
|
||||
this.exactCallsLog = SCAC.exactCallsLog;
|
||||
this.AFmodel = SCAC.AFmodel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
final ReduceTree reduceTree = new ReduceTree(this);
|
||||
initializeWalker(walker);
|
||||
|
||||
while (isShardTraversePending() || isTreeReducePending()) {
|
||||
while (! abortExecution() && (isShardTraversePending() || isTreeReducePending())) {
|
||||
// Check for errors during execution.
|
||||
errorTracker.throwErrorIfPending();
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
|
||||
final TraversalEngine traversalEngine = borrowTraversalEngine(this);
|
||||
for (Shard shard : shardStrategy ) {
|
||||
if ( done || shard == null ) // we ran out of shards that aren't owned
|
||||
if ( abortExecution() || done || shard == null ) // we ran out of shards that aren't owned
|
||||
break;
|
||||
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -52,6 +53,7 @@ import javax.management.ObjectName;
|
|||
import java.io.File;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -74,8 +76,6 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||
// TODO -- remove me and retire non nano scheduled versions of traversals
|
||||
private final static boolean USE_NANOSCHEDULER_FOR_EVERYTHING = true;
|
||||
protected static final Logger logger = Logger.getLogger(MicroScheduler.class);
|
||||
|
||||
/**
|
||||
|
|
@ -238,15 +238,9 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
@Ensures("result != null")
|
||||
private TraversalEngine createTraversalEngine(final Walker walker, final ThreadAllocation threadAllocation) {
|
||||
if (walker instanceof ReadWalker) {
|
||||
if ( USE_NANOSCHEDULER_FOR_EVERYTHING || threadAllocation.getNumCPUThreadsPerDataThread() > 1 )
|
||||
return new TraverseReadsNano(threadAllocation.getNumCPUThreadsPerDataThread());
|
||||
else
|
||||
return new TraverseReads();
|
||||
return new TraverseReadsNano(threadAllocation.getNumCPUThreadsPerDataThread());
|
||||
} else if (walker instanceof LocusWalker) {
|
||||
if ( USE_NANOSCHEDULER_FOR_EVERYTHING || threadAllocation.getNumCPUThreadsPerDataThread() > 1 )
|
||||
return new TraverseLociNano(threadAllocation.getNumCPUThreadsPerDataThread());
|
||||
else
|
||||
return new TraverseLociLinear();
|
||||
return new TraverseLociNano(threadAllocation.getNumCPUThreadsPerDataThread());
|
||||
} else if (walker instanceof DuplicateWalker) {
|
||||
return new TraverseDuplicates();
|
||||
} else if (walker instanceof ReadPairWalker) {
|
||||
|
|
@ -277,6 +271,26 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
this.threadEfficiencyMonitor = threadEfficiencyMonitor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we stop all execution work and exit gracefully?
|
||||
*
|
||||
* Returns true in the case where some external signal or time limit has been received, indicating
|
||||
* that this GATK shouldn't continue executing. This isn't a kill signal, it is really a "shutdown
|
||||
* gracefully at the next opportunity" signal. Concrete implementations of the MicroScheduler
|
||||
* examine this value as often as reasonable and, if it returns true, stop what they are doing
|
||||
* at the next available opportunity, shutdown their resources, call notify done, and return.
|
||||
*
|
||||
* @return true if we should abort execution, or false otherwise
|
||||
*/
|
||||
protected boolean abortExecution() {
|
||||
final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS);
|
||||
if ( abort ) {
|
||||
final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4);
|
||||
logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft);
|
||||
}
|
||||
return abort;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks a walker over the given list of intervals.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
? new VariantContextWriterStub(engine, writerFile, argumentSources)
|
||||
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
|
||||
|
||||
stub.setCompressed(isCompressed(writerFileName.asString()));
|
||||
stub.setCompressed(isCompressed(writerFileName == null ? null: writerFileName.asString()));
|
||||
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
|
||||
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
|
||||
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
|
||||
|
|
|
|||
|
|
@ -1,103 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public abstract class TraverseLociBase<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
@Override
|
||||
public final String getTraversalUnits() {
|
||||
return "sites";
|
||||
}
|
||||
|
||||
protected static class TraverseResults<T> {
|
||||
final int numIterations;
|
||||
final T reduceResult;
|
||||
|
||||
public TraverseResults(int numIterations, T reduceResult) {
|
||||
this.numIterations = numIterations;
|
||||
this.reduceResult = reduceResult;
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract TraverseResults<T> traverse( final LocusWalker<M,T> walker,
|
||||
final LocusView locusView,
|
||||
final LocusReferenceView referenceView,
|
||||
final ReferenceOrderedView referenceOrderedDataView,
|
||||
final T sum);
|
||||
|
||||
@Override
|
||||
public T traverse( LocusWalker<M,T> walker,
|
||||
LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseLociBase.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = null;
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
|
||||
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
|
||||
sum = result.reduceResult;
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
}
|
||||
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
// last position in the ROD to that in the interval
|
||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
|
||||
// only do this if the walker isn't done!
|
||||
final RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||
final long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||
final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
|
||||
final M x = walker.map(null, null, ac);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best view of loci for this walker given the available data. The view will function as a 'trigger track'
|
||||
* of sorts, providing a consistent interface so that TraverseLociBase doesn't need to be reimplemented for any new datatype
|
||||
* that comes along.
|
||||
* @param walker walker to interrogate.
|
||||
* @param dataProvider Data which which to drive the locus view.
|
||||
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||
*/
|
||||
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
||||
final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
if( dataSource == DataSource.READS )
|
||||
return new CoveredLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
||||
return new AllLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new RodLocusView(dataProvider);
|
||||
else
|
||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public class TraverseLociLinear<M,T> extends TraverseLociBase<M,T> {
|
||||
|
||||
@Override
|
||||
protected TraverseResults<T> traverse(LocusWalker<M, T> walker, LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView, T sum) {
|
||||
// We keep processing while the next reference location is within the interval
|
||||
boolean done = false;
|
||||
int numIterations = 0;
|
||||
|
||||
while( locusView.hasNext() && ! done ) {
|
||||
numIterations++;
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
||||
if (keepMeP) {
|
||||
final M x = walker.map(tracker, refContext, locus);
|
||||
sum = walker.reduce(x, sum);
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
return new TraverseResults<T>(numIterations, sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,24 +1,26 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* A simple solution to iterating over all reference positions over a series of genomic locations.
|
||||
*/
|
||||
public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
|
||||
public class TraverseLociNano<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
|
|
@ -30,6 +32,81 @@ public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final String getTraversalUnits() {
|
||||
return "sites";
|
||||
}
|
||||
|
||||
protected static class TraverseResults<T> {
|
||||
final int numIterations;
|
||||
final T reduceResult;
|
||||
|
||||
public TraverseResults(int numIterations, T reduceResult) {
|
||||
this.numIterations = numIterations;
|
||||
this.reduceResult = reduceResult;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( LocusWalker<M,T> walker,
|
||||
LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = null;
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
|
||||
final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
|
||||
sum = result.reduceResult;
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
}
|
||||
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
// last position in the ROD to that in the interval
|
||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
|
||||
// only do this if the walker isn't done!
|
||||
final RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||
final long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||
final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
|
||||
final M x = walker.map(null, null, ac);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best view of loci for this walker given the available data. The view will function as a 'trigger track'
|
||||
* of sorts, providing a consistent interface so that TraverseLoci doesn't need to be reimplemented for any new datatype
|
||||
* that comes along.
|
||||
* @param walker walker to interrogate.
|
||||
* @param dataProvider Data which which to drive the locus view.
|
||||
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||
*/
|
||||
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
||||
final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
if( dataSource == DataSource.READS )
|
||||
return new CoveredLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
||||
return new AllLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new RodLocusView(dataProvider);
|
||||
else
|
||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
|
||||
protected TraverseResults<T> traverse(final LocusWalker<M, T> walker,
|
||||
final LocusView locusView,
|
||||
final LocusReferenceView referenceView,
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
|
|||
public T traverse(ReadPairWalker<M, T> walker,
|
||||
ReadShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider));
|
||||
logger.debug(String.format("TraverseReadsPairs.traverse Covered dataset is %s", dataProvider));
|
||||
|
||||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
|
|
|||
|
|
@ -1,111 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 24, 2009
|
||||
* <p/>
|
||||
* Class TraverseReads
|
||||
* <p/>
|
||||
* This class handles traversing by reads in the new shardable style
|
||||
*/
|
||||
public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static final Logger logger = Logger.getLogger(TraverseReads.class);
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "reads";
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse by reads, given the data and the walker
|
||||
*
|
||||
* @param walker the walker to traverse with
|
||||
* @param dataProvider the provider of the reads data
|
||||
* @param sum the value of type T, specified by the walker, to feed to the walkers reduce function
|
||||
* @return the reduce variable of the read walker
|
||||
*/
|
||||
public T traverse(ReadWalker<M,T> walker,
|
||||
ReadShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
|
||||
logger.debug(String.format("TraverseReads.traverse Covered dataset is %s", dataProvider));
|
||||
|
||||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
||||
final ReadView reads = new ReadView(dataProvider);
|
||||
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
|
||||
// get the reference ordered data
|
||||
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||
|
||||
boolean done = walker.isDone();
|
||||
// while we still have more reads
|
||||
for (final SAMRecord read : reads) {
|
||||
if ( done ) break;
|
||||
|
||||
// ReferenceContext -- the reference bases covered by the read
|
||||
final ReferenceContext refContext = ! read.getReadUnmappedFlag() && dataProvider.hasReference()
|
||||
? reference.getReferenceContext(read)
|
||||
: null;
|
||||
|
||||
// update the number of reads we've seen
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// if the read is mapped, create a metadata tracker
|
||||
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
|
||||
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
final GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
printProgress(locus);
|
||||
|
||||
done = walker.isDone();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -34,13 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -32,8 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -8,12 +8,10 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.IndelUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -7,11 +7,9 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
|
|||
|
|
@ -7,16 +7,14 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -7,15 +7,13 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.List;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ import java.util.ArrayList;
|
|||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
||||
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
|
|
@ -277,11 +277,6 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
return sum;
|
||||
}
|
||||
|
||||
public Long treeReduce(Long sum1, Long sum2) {
|
||||
sum1 += sum2;
|
||||
return sum1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Long result) {
|
||||
logger.info("Calculating quantized quality scores...");
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
* @param refBase The reference base at this locus
|
||||
*/
|
||||
@Override
|
||||
public synchronized void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
|
||||
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
|
||||
final int offset = pileupElement.getOffset();
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
|
||||
|
||||
|
|
@ -65,35 +65,21 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
|
||||
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
// TODO: should this really be combine rather than increment?
|
||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
else
|
||||
qualPreviousDatum.increment(isError);
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
|
||||
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
|
||||
if (covPreviousDatum == null)
|
||||
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
|
||||
else
|
||||
covPreviousDatum.increment(isError);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version");
|
||||
}
|
||||
|
||||
|
|
@ -121,4 +107,133 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
|
||||
return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error status for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
// There are three cases here:
|
||||
//
|
||||
// 1. The original get succeeded (existingDatum != null) because there already was an item in this position.
|
||||
// In this case we can just increment the existing item.
|
||||
//
|
||||
// 2. The original get failed (existingDatum == null), and we successfully put a new item in this position
|
||||
// and are done.
|
||||
//
|
||||
// 3. The original get failed (existingDatum == null), AND the put fails because another thread comes along
|
||||
// in the interim and puts an item in this position. In this case we need to do another get after the
|
||||
// failed put to get the item the other thread put here and increment it.
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(isError);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final double isError, final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
|
||||
* new item there if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error status for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
final RecalDatum newDatum = createDatumObject(qual, isError);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(newDatum, keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
|
||||
table.get(keys).combine(newDatum);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so combine it with our item
|
||||
existingDatum.combine(newDatum);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
|
||||
* new item there if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final double isError, final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
final RecalDatum newDatum = createDatumObject(qual, isError);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(newDatum, keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
|
||||
table.get(keys).combine(newDatum);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so combine it with our item
|
||||
existingDatum.combine(newDatum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap);
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap);
|
||||
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -81,7 +82,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
// if (!ref.getLocus().equals(lastSiteVisited)) {
|
||||
|
|
@ -118,12 +119,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
|
||||
// no likelihoods have been computed for this sample at this site
|
||||
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
|
||||
perReadAlleleLikelihoodMap.put(sample.getKey(), PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap());
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()));
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.CONTAMINATION_FRACTION, UAC.contaminationLog);
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
|
@ -48,13 +49,13 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
private final boolean useAlleleFromVCF;
|
||||
|
||||
private final double[] likelihoodSums = new double[4];
|
||||
private final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
|
||||
|
||||
private final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
|
||||
|
||||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
|
||||
perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
|
||||
}
|
||||
|
||||
public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
|
||||
|
|
@ -64,9 +65,9 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> sampleLikelihoodMap) {
|
||||
|
||||
perReadAlleleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data
|
||||
sampleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data
|
||||
|
||||
final byte refBase = ref.getBase();
|
||||
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||
|
|
@ -79,8 +80,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
ArrayList<SampleGenotypeData> GLs = new ArrayList<SampleGenotypeData>(contexts.size());
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if ( UAC.CONTAMINATION_PERCENTAGE > 0.0 )
|
||||
pileup = createDecontaminatedPileup(pileup, UAC.CONTAMINATION_PERCENTAGE);
|
||||
if ( UAC.CONTAMINATION_FRACTION > 0.0 )
|
||||
pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup, UAC.CONTAMINATION_FRACTION, UAC.contaminationLog);
|
||||
if ( useBAQedPileup )
|
||||
pileup = createBAQedPileup(pileup);
|
||||
|
||||
|
|
@ -203,42 +204,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
return allelesToUse;
|
||||
}
|
||||
|
||||
public ReadBackedPileup createDecontaminatedPileup(final ReadBackedPileup pileup, final double contaminationPercentage) {
|
||||
// special case removal of all reads
|
||||
if ( contaminationPercentage >= 1.0 )
|
||||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
|
||||
|
||||
// start by stratifying the reads by the alleles they represent at this position
|
||||
for( final PileupElement pe : pileup ) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
|
||||
if ( baseIndex != -1 )
|
||||
alleleStratifiedElements[baseIndex].add(pe);
|
||||
}
|
||||
|
||||
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
|
||||
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
|
||||
int numReadsToRemove = (int)Math.ceil((double)pileup.getNumberOfElements() * contaminationPercentage);
|
||||
final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
|
||||
@Override
|
||||
public int compare(PileupElement element1, PileupElement element2) {
|
||||
final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
|
||||
return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
|
||||
}
|
||||
});
|
||||
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
|
||||
if ( alleleList.size() > numReadsToRemove )
|
||||
elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove));
|
||||
}
|
||||
|
||||
// clean up pointers so memory can be garbage collected if needed
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i].clear();
|
||||
|
||||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
|
||||
}
|
||||
|
||||
public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
|
||||
final List<PileupElement> BAQedElements = new ArrayList<PileupElement>();
|
||||
for( final PileupElement PE : pileup ) {
|
||||
|
|
@ -257,22 +222,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
|
||||
}
|
||||
|
||||
private List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove) {
|
||||
final int pileupSize = elements.size();
|
||||
final BitSet itemsToRemove = new BitSet(pileupSize);
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
}
|
||||
|
||||
ArrayList<PileupElement> elementsToKeep = new ArrayList<PileupElement>(pileupSize - numElementsToRemove);
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
if ( !itemsToRemove.get(i) )
|
||||
elementsToKeep.add(elements.get(i));
|
||||
}
|
||||
|
||||
return elementsToKeep;
|
||||
}
|
||||
|
||||
private static class SampleGenotypeData {
|
||||
|
||||
public final String name;
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
/**
|
||||
* Note that calculating the SLOD increases the runtime by an appreciable amount.
|
||||
*/
|
||||
@Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
|
||||
public boolean NO_SLOD = false;
|
||||
@Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false)
|
||||
public boolean COMPUTE_SLOD = false;
|
||||
|
||||
/**
|
||||
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
|
||||
|
|
@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
this.GLmodel = uac.GLmodel;
|
||||
this.AFmodel = uac.AFmodel;
|
||||
this.PCR_error = uac.PCR_error;
|
||||
this.NO_SLOD = uac.NO_SLOD;
|
||||
this.COMPUTE_SLOD = uac.COMPUTE_SLOD;
|
||||
this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
|
||||
this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE;
|
||||
this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
|
|
@ -234,36 +235,26 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ||
|
||||
UAC.referenceSampleName != null ||
|
||||
UAC.referenceSampleRod.isBound()) {
|
||||
throw new UserException.NotSupportedInGATKLite("Usage of ploidy values different than 2 not supported in this GATK version");
|
||||
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of ploidy values other than 2");
|
||||
}
|
||||
|
||||
if ( UAC.CONTAMINATION_FRACTION > 0.0 ) {
|
||||
if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) {
|
||||
UAC.CONTAMINATION_FRACTION = 0.0;
|
||||
logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite");
|
||||
} else {
|
||||
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( UAC.TREAT_ALL_READS_AS_SINGLE_POOL ) {
|
||||
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
|
||||
} else {
|
||||
// get all of the unique sample names
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
} else {
|
||||
// in full mode: check for consistency in ploidy/pool calling arguments
|
||||
// check for correct calculation models
|
||||
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
// polyploidy requires POOL GL and AF calculation models to be specified right now
|
||||
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
|
||||
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
|
||||
throw new UserException("Incorrect genotype calculation model chosen. Only [POOLSNP|POOLINDEL|POOLBOTH] supported with this walker if sample ploidy != 2");
|
||||
}
|
||||
|
||||
if (UAC.AFmodel != AFCalc.Model.POOL)
|
||||
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
|
||||
|
||||
}
|
||||
*/
|
||||
// get all of the unique sample names
|
||||
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
|
||||
samples.clear();
|
||||
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
|
||||
} else {
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
if (UAC.referenceSampleName != null )
|
||||
samples.remove(UAC.referenceSampleName);
|
||||
}
|
||||
|
||||
if ( UAC.referenceSampleName != null )
|
||||
samples.remove(UAC.referenceSampleName);
|
||||
}
|
||||
|
||||
// check for a bad max alleles value
|
||||
|
|
@ -305,7 +296,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
if ( !UAC.NO_SLOD )
|
||||
if ( UAC.COMPUTE_SLOD )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY);
|
||||
|
||||
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
|
||||
|
|
|
|||
|
|
@ -153,19 +153,19 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
|
||||
*
|
||||
* If allSamples != null, then the output variantCallContext is guarenteed to contain a genotype
|
||||
* for every sample in allSamples. If it's null there's no such guarentee. Providing this
|
||||
* argument is critical when the resulting calls will be written to a VCF file.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @param allSamples set of all sample names that we might call (i.e., those in the VCF header)
|
||||
* @return the VariantCallContext object
|
||||
*/
|
||||
/**
|
||||
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
|
||||
*
|
||||
* If allSamples != null, then the output variantCallContext is guarenteed to contain a genotype
|
||||
* for every sample in allSamples. If it's null there's no such guarentee. Providing this
|
||||
* argument is critical when the resulting calls will be written to a VCF file.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @param allSamples set of all sample names that we might call (i.e., those in the VCF header)
|
||||
* @return the VariantCallContext object
|
||||
*/
|
||||
public List<VariantCallContext> calculateLikelihoodsAndGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
|
|
@ -174,7 +174,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<String,PerReadAlleleLikelihoodMap>();
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>();
|
||||
|
||||
if ( models.isEmpty() ) {
|
||||
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
||||
|
|
@ -209,7 +209,7 @@ public class UnifiedGenotyperEngine {
|
|||
public VariantContext calculateLikelihoods(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||
if ( models.isEmpty() ) {
|
||||
return null;
|
||||
|
|
@ -275,7 +275,7 @@ public class UnifiedGenotyperEngine {
|
|||
final List<Allele> alternateAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( glcm.get() == null ) {
|
||||
|
|
@ -313,7 +313,7 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vc, false);
|
||||
}
|
||||
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ public class UnifiedGenotyperEngine {
|
|||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false,perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
|
|
@ -346,7 +346,7 @@ public class UnifiedGenotyperEngine {
|
|||
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final boolean inheritAttributesFromInputVC,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||
|
||||
|
|
@ -451,7 +451,7 @@ public class UnifiedGenotyperEngine {
|
|||
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
}
|
||||
|
||||
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||
//final boolean DEBUG_SLOD = false;
|
||||
|
||||
// the overall lod
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ public abstract class AFCalc implements Cloneable {
|
|||
|
||||
protected final int nSamples;
|
||||
protected final int maxAlternateAllelesToGenotype;
|
||||
protected final int maxAlternateAllelesForIndels;
|
||||
|
||||
protected Logger logger = defaultLogger;
|
||||
|
||||
|
|
@ -60,19 +59,16 @@ public abstract class AFCalc implements Cloneable {
|
|||
*
|
||||
* @param nSamples number of samples, must be > 0
|
||||
* @param maxAltAlleles maxAltAlleles for SNPs
|
||||
* @param maxAltAllelesForIndels for indels
|
||||
* @param ploidy the ploidy, must be > 0
|
||||
*/
|
||||
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
protected AFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
||||
if ( maxAltAllelesForIndels < 1 ) throw new IllegalArgumentException("maxAltAllelesForIndels must be greater than zero " + maxAltAllelesForIndels);
|
||||
if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy);
|
||||
|
||||
this.nSamples = nSamples;
|
||||
this.maxAlternateAllelesToGenotype = maxAltAlleles;
|
||||
this.maxAlternateAllelesForIndels = maxAltAllelesForIndels;
|
||||
this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels));
|
||||
this.stateTracker = new StateTracker(maxAltAlleles);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -191,7 +187,7 @@ public abstract class AFCalc implements Cloneable {
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
public int getMaxAltAlleles() {
|
||||
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
|
||||
return maxAlternateAllelesToGenotype;
|
||||
}
|
||||
|
||||
protected StateTracker getStateTracker() {
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ public class AFCalcFactory {
|
|||
public static AFCalc createAFCalc(final UnifiedArgumentCollection UAC,
|
||||
final int nSamples,
|
||||
final Logger logger) {
|
||||
final int maxAltAlleles = Math.max(UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS);
|
||||
final int maxAltAlleles = UAC.MAX_ALTERNATE_ALLELES;
|
||||
if ( ! UAC.AFmodel.usableForParams(UAC.samplePloidy, maxAltAlleles) ) {
|
||||
logger.info("Requested ploidy " + UAC.samplePloidy + " maxAltAlleles " + maxAltAlleles + " not supported by requested model " + UAC.AFmodel + " looking for an option");
|
||||
final List<Calculation> supportingCalculations = new LinkedList<Calculation>();
|
||||
|
|
@ -109,7 +109,7 @@ public class AFCalcFactory {
|
|||
logger.info("Selecting model " + UAC.AFmodel);
|
||||
}
|
||||
|
||||
final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.samplePloidy);
|
||||
final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.samplePloidy);
|
||||
|
||||
if ( logger != null ) calc.setLogger(logger);
|
||||
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);
|
||||
|
|
@ -126,7 +126,7 @@ public class AFCalcFactory {
|
|||
* @return an initialized AFCalc
|
||||
*/
|
||||
public static AFCalc createAFCalc(final int nSamples) {
|
||||
return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2, 2);
|
||||
return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -139,7 +139,7 @@ public class AFCalcFactory {
|
|||
* @return an initialized AFCalc
|
||||
*/
|
||||
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) {
|
||||
return createAFCalc(calc, nSamples, maxAltAlleles, maxAltAlleles, 2);
|
||||
return createAFCalc(calc, nSamples, maxAltAlleles, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -147,14 +147,12 @@ public class AFCalcFactory {
|
|||
*
|
||||
* @param nSamples the number of samples we'll be using
|
||||
* @param maxAltAlleles the max. alt alleles to consider for SNPs
|
||||
* @param maxAltAllelesForIndels the max. alt alleles to consider for non-SNPs
|
||||
* @param ploidy the sample ploidy. Must be consistent with the calc
|
||||
*
|
||||
* @return an initialized AFCalc
|
||||
*/
|
||||
public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
final int maxAlt = Math.max(maxAltAlleles, maxAltAllelesForIndels);
|
||||
return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAlt), nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -181,20 +179,17 @@ public class AFCalcFactory {
|
|||
* @param calc the calculation to use
|
||||
* @param nSamples the number of samples we'll be using
|
||||
* @param maxAltAlleles the max. alt alleles to consider for SNPs
|
||||
* @param maxAltAllelesForIndels the max. alt alleles to consider for non-SNPs
|
||||
* @param ploidy the sample ploidy. Must be consistent with the calc
|
||||
*
|
||||
* @return an initialized AFCalc
|
||||
*/
|
||||
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null");
|
||||
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
||||
if ( maxAltAllelesForIndels < 1 ) throw new IllegalArgumentException("maxAltAllelesForIndels must be greater than zero " + maxAltAllelesForIndels);
|
||||
if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy);
|
||||
|
||||
final int maxAlt = Math.max(maxAltAlleles, maxAltAllelesForIndels);
|
||||
if ( ! calc.usableForParams(ploidy, maxAlt) )
|
||||
if ( ! calc.usableForParams(ploidy, maxAltAlleles) )
|
||||
throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy);
|
||||
|
||||
final Class<? extends AFCalc> afClass = getClassByName(calc.className);
|
||||
|
|
@ -202,19 +197,19 @@ public class AFCalcFactory {
|
|||
throw new IllegalArgumentException("Unexpected AFCalc " + calc);
|
||||
|
||||
try {
|
||||
Object args[] = new Object[]{nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy};
|
||||
Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class, int.class);
|
||||
Object args[] = new Object[]{nSamples, maxAltAlleles, ploidy};
|
||||
Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class);
|
||||
return (AFCalc)c.newInstance(args);
|
||||
} catch (Exception e) {
|
||||
throw new ReviewedStingException("Could not instantiate AFCalc " + calc, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected static List<AFCalc> createAFCalcs(final List<Calculation> calcs, final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
protected static List<AFCalc> createAFCalcs(final List<Calculation> calcs, final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
final List<AFCalc> AFCalcs = new LinkedList<AFCalc>();
|
||||
|
||||
for ( final Calculation calc : calcs )
|
||||
AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy));
|
||||
AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, ploidy));
|
||||
|
||||
return AFCalcs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,8 +31,8 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.util.*;
|
||||
|
||||
public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
||||
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
||||
}
|
||||
|
||||
|
|
@ -75,16 +75,14 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
@Override
|
||||
protected VariantContext reduceScope(final VariantContext vc) {
|
||||
final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? maxAlternateAllelesForIndels : maxAlternateAllelesToGenotype;
|
||||
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) {
|
||||
logger.warn("this tool is currently set to genotype at most " + myMaxAltAllelesToGenotype + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles() ) {
|
||||
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
List<Allele> alleles = new ArrayList<Allele>(myMaxAltAllelesToGenotype + 1);
|
||||
List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, myMaxAltAllelesToGenotype));
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
return builder.make();
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ import java.util.ArrayList;
|
|||
abstract class ExactAFCalc extends AFCalc {
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
|
||||
protected ExactAFCalc(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ import java.util.*;
|
|||
/**
|
||||
* The min. confidence of an allele to be included in the joint posterior.
|
||||
*/
|
||||
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20);
|
||||
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-10);
|
||||
|
||||
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
|
||||
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
|
@ -111,9 +111,9 @@ import java.util.*;
|
|||
*/
|
||||
final AFCalc biAlleleExactModel;
|
||||
|
||||
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
|
||||
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, ploidy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -329,6 +329,7 @@ import java.util.*;
|
|||
double log10PosteriorOfACEq0Sum = 0.0;
|
||||
double log10PosteriorOfACGt0Sum = 0.0;
|
||||
|
||||
boolean anyPoly = false;
|
||||
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
|
||||
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
|
||||
final int altI = vc.getAlleles().indexOf(altAllele) - 1;
|
||||
|
|
@ -336,12 +337,14 @@ import java.util.*;
|
|||
// MLE of altI allele is simply the MLE of this allele in altAlleles
|
||||
alleleCountsOfMLE[altI] = sortedResultWithThetaNPriors.getAlleleCountAtMLE(altAllele);
|
||||
|
||||
log10PriorsOfAC[0] += sortedResultWithThetaNPriors.getLog10PriorOfAFEq0();
|
||||
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
|
||||
|
||||
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
||||
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
|
||||
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR ) {
|
||||
anyPoly = true;
|
||||
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
||||
log10PriorsOfAC[0] += sortedResultWithThetaNPriors.getLog10PriorOfAFEq0();
|
||||
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
|
||||
}
|
||||
|
||||
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
||||
|
||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||
|
|
@ -351,6 +354,12 @@ import java.util.*;
|
|||
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
|
||||
}
|
||||
|
||||
// If no alleles were polymorphic, make sure we have the proper priors (the defaults) for likelihood calculation
|
||||
if ( ! anyPoly ) {
|
||||
log10PriorsOfAC[0] = sortedResultsWithThetaNPriors.get(0).getLog10PriorOfAFEq0();
|
||||
log10PriorsOfAC[1] = sortedResultsWithThetaNPriors.get(0).getLog10PriorOfAFGT0();
|
||||
}
|
||||
|
||||
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
|
||||
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
|
||||
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@ import java.util.Map;
|
|||
* Original bi-allelic ~O(N) implementation. Kept here for posterity and reference
|
||||
*/
|
||||
class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
||||
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
|
|
@ -41,8 +41,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -188,11 +188,14 @@ public class PairHMMIndelErrorModel {
|
|||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext ref,
|
||||
final int eventLength,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap,
|
||||
final double downsamplingFraction,
|
||||
final PrintStream downsamplingLog) {
|
||||
final int numHaplotypes = haplotypeMap.size();
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog);
|
||||
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,173 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
|
||||
* the reduced bam are above sufficient threshold)
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The original and reduced BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -I:original original.bam \
|
||||
* -I:reduced reduced.bam \
|
||||
* -R ref.fasta \
|
||||
* -T AssessReducedQuals \
|
||||
* -o output.intervals
|
||||
* </pre>
|
||||
*
|
||||
* @author ami
|
||||
*/
|
||||
|
||||
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||
|
||||
private static final String reduced = "reduced";
|
||||
private static final String original = "original";
|
||||
private static final int originalQualsIndex = 0;
|
||||
private static final int reducedQualsIndex = 1;
|
||||
|
||||
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
|
||||
public int sufficientQualSum = 600;
|
||||
|
||||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||
public int qual_epsilon = 0;
|
||||
|
||||
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional
|
||||
public int debugLevel = 0; // TODO -- best to make this an enum or boolean
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
public void initialize() {
|
||||
if (debugLevel != 0)
|
||||
out.println(" Debug mode" +
|
||||
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
|
||||
"Debug:\tqual_epsilon: "+qual_epsilon);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
@Override
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
boolean reportLocus;
|
||||
final int[] quals = getPileupQuals(context.getBasePileup());
|
||||
if (debugLevel != 0)
|
||||
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
|
||||
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
|
||||
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
|
||||
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
|
||||
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
|
||||
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
|
||||
if(debugLevel != 0 && reportLocus)
|
||||
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
|
||||
|
||||
return reportLocus ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||
|
||||
final int[] quals = new int[2];
|
||||
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
|
||||
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
|
||||
|
||||
for( PileupElement p : readPileup ){
|
||||
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
|
||||
if ( isGoodRead(p,tags) ){
|
||||
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
|
||||
final int tagIndex = getTagIndex(tags);
|
||||
quals[tagIndex] += tempQual;
|
||||
if(debugLevel == 2)
|
||||
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
|
||||
}
|
||||
}
|
||||
if(debugLevel == 2){
|
||||
out.println(printPileup[originalQualsIndex]);
|
||||
out.println(printPileup[reducedQualsIndex]);
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
||||
// TODO -- arguments/variables should be final, not method declaration
|
||||
private final boolean isGoodRead(PileupElement p, List<String> tags){
|
||||
// TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which
|
||||
// TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only
|
||||
// TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
|
||||
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||
}
|
||||
|
||||
private final int getTagIndex(List<String> tags){
|
||||
return tags.contains(reduced) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||
if ( lhs == null )
|
||||
return rhs;
|
||||
|
||||
if ( rhs == null )
|
||||
return lhs;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( lhs.contiguousP(rhs) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||
|
||||
// otherwise, print the lhs and start over with the rhs
|
||||
out.println(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( sum.contiguousP(value) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||
|
||||
// otherwise, print the sum and start over with the value
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -245,24 +245,21 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
@Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false)
|
||||
private int minDepth = -1;
|
||||
|
||||
/**
|
||||
* If your VCF or BAM file has more than one sample and you only want to validate one, use this parameter to choose it.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName ="sample", shortName ="sn", doc="Name of the sample to validate (in case your VCF/BAM has more than one sample)", required=false)
|
||||
private String sample = "";
|
||||
|
||||
/**
|
||||
/**
|
||||
* Print out discordance sites to standard out.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName ="print_interesting_sites", shortName ="print_interesting", doc="Print out interesting sites to standard out", required=false)
|
||||
private boolean printInterestingSites;
|
||||
private boolean printInterestingSites = false;
|
||||
|
||||
private UnifiedGenotyperEngine snpEngine;
|
||||
private UnifiedGenotyperEngine indelEngine;
|
||||
private Set<String> samples;
|
||||
|
||||
private enum GVstatus {
|
||||
T, F, NONE
|
||||
}
|
||||
|
||||
public static class CountedData {
|
||||
private long nAltCalledAlt = 0L;
|
||||
private long nAltCalledRef = 0L;
|
||||
|
|
@ -336,8 +333,9 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||
|
||||
// Adding the INDEL calling arguments for UG
|
||||
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||
UnifiedArgumentCollection uac_indel = new UnifiedArgumentCollection(uac);
|
||||
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac_indel);
|
||||
|
||||
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
|
||||
callConf = uac.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
|
|
@ -368,9 +366,10 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
// Do not operate on variants that are not covered to the optional minimum depth
|
||||
if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
|
||||
counter.nUncovered = 1L;
|
||||
if (vcComp.getAttribute("GV").equals("T"))
|
||||
final GVstatus status = getGVstatus(vcComp);
|
||||
if ( status == GVstatus.T )
|
||||
counter.nAltNotCalled = 1L;
|
||||
else if (vcComp.getAttribute("GV").equals("F"))
|
||||
else if ( status == GVstatus.F )
|
||||
counter.nRefNotCalled = 1L;
|
||||
else
|
||||
counter.nNoStatusNotCalled = 1L;
|
||||
|
|
@ -427,10 +426,11 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
// if (!vcComp.hasExtendedAttribute("GV"))
|
||||
// throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart());
|
||||
|
||||
final GVstatus status = getGVstatus(vcComp);
|
||||
if (call.isCalledAlt(callConf)) {
|
||||
if (vcComp.getAttribute("GV").equals("T"))
|
||||
if ( status == GVstatus.T )
|
||||
counter.nAltCalledAlt = 1L;
|
||||
else if (vcComp.getAttribute("GV").equals("F")) {
|
||||
else if ( status == GVstatus.F ) {
|
||||
counter.nRefCalledAlt = 1L;
|
||||
if ( printInterestingSites )
|
||||
System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart());
|
||||
|
|
@ -439,12 +439,12 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
counter.nNoStatusCalledAlt = 1L;
|
||||
}
|
||||
else if (call.isCalledRef(callConf)) {
|
||||
if (vcComp.getAttribute("GV").equals("T")) {
|
||||
if ( status == GVstatus.T ) {
|
||||
counter.nAltCalledRef = 1L;
|
||||
if ( printInterestingSites )
|
||||
System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart());
|
||||
}
|
||||
else if (vcComp.getAttribute("GV").equals("F"))
|
||||
else if ( status == GVstatus.F )
|
||||
counter.nRefCalledRef = 1L;
|
||||
|
||||
else
|
||||
|
|
@ -452,9 +452,9 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
}
|
||||
else {
|
||||
counter.nNotConfidentCalls = 1L;
|
||||
if (vcComp.getAttribute("GV").equals("T"))
|
||||
if ( status == GVstatus.T )
|
||||
counter.nAltNotCalled = 1L;
|
||||
else if (vcComp.getAttribute("GV").equals("F"))
|
||||
else if ( status == GVstatus.F )
|
||||
counter.nRefNotCalled = 1L;
|
||||
else
|
||||
counter.nNoStatusNotCalled = 1L;
|
||||
|
|
@ -475,6 +475,10 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
return counter;
|
||||
}
|
||||
|
||||
private GVstatus getGVstatus(final VariantContext vc) {
|
||||
return ( !vc.hasAttribute("GV") ) ? GVstatus.NONE : (vc.getAttribute("GV").equals("T") ? GVstatus.T : GVstatus.F);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
// do we want to apply a prior? maybe user-spec?
|
||||
if ( flatPriors == null ) {
|
||||
flatPriors = new double[1+2*samples.size()];
|
||||
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 4, 2);
|
||||
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2);
|
||||
}
|
||||
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors);
|
||||
// do we want to let this qual go up or down?
|
||||
|
|
|
|||
|
|
@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
|
||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
UAC.NO_SLOD = true;
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -346,7 +346,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
@Override
|
||||
protected String getFreezeFields() {
|
||||
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n");
|
||||
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,21 @@ package org.broadinstitute.sting.utils;
|
|||
* Simple utility class that makes it convenient to print unit adjusted times
|
||||
*/
|
||||
public class AutoFormattingTime {
|
||||
double timeInSeconds; // in Seconds
|
||||
int precision; // for format
|
||||
private final int width; // for format
|
||||
private final int precision; // for format
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds, int precision) {
|
||||
double timeInSeconds; // in Seconds
|
||||
private final String formatString;
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds, final int width, int precision) {
|
||||
this.width = width;
|
||||
this.timeInSeconds = timeInSeconds;
|
||||
this.precision = precision;
|
||||
this.formatString = "%" + width + "." + precision + "f %s";
|
||||
}
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds, int precision) {
|
||||
this(timeInSeconds, 6, precision);
|
||||
}
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds) {
|
||||
|
|
@ -20,6 +29,20 @@ public class AutoFormattingTime {
|
|||
return timeInSeconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the precision (a la format's %WIDTH.PERCISIONf)
|
||||
*/
|
||||
public int getWidth() {
|
||||
return width;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the precision (a la format's %WIDTH.PERCISIONf)
|
||||
*/
|
||||
public int getPrecision() {
|
||||
return precision;
|
||||
}
|
||||
|
||||
/**
|
||||
* Instead of 10000 s, returns 2.8 hours
|
||||
* @return
|
||||
|
|
@ -48,6 +71,6 @@ public class AutoFormattingTime {
|
|||
}
|
||||
}
|
||||
|
||||
return String.format("%6."+precision+"f %s", unitTime, unit);
|
||||
return String.format(formatString, unitTime, unit);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,9 +99,7 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void put( final T value, final int... keys ) {
|
||||
super.put(value, keys);
|
||||
|
||||
public boolean put( final T value, final int... keys ) {
|
||||
StringBuilder logEntry = new StringBuilder();
|
||||
|
||||
logEntry.append(logEntryLabel);
|
||||
|
|
@ -116,5 +114,7 @@ public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
|
|||
|
||||
// PrintStream methods all use synchronized blocks internally, so our logging is thread-safe
|
||||
log.println(logEntry.toString());
|
||||
|
||||
return super.put(value, keys);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,9 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -38,18 +40,51 @@ import java.util.List;
|
|||
|
||||
public class NestedIntegerArray<T> {
|
||||
|
||||
private static Logger logger = Logger.getLogger(NestedIntegerArray.class);
|
||||
|
||||
protected final Object[] data;
|
||||
|
||||
protected final int numDimensions;
|
||||
protected final int[] dimensions;
|
||||
|
||||
// Preallocate the first two dimensions to limit contention during tree traversals in put()
|
||||
private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2;
|
||||
|
||||
public NestedIntegerArray(final int... dimensions) {
|
||||
numDimensions = dimensions.length;
|
||||
if ( numDimensions == 0 )
|
||||
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
|
||||
this.dimensions = dimensions.clone();
|
||||
|
||||
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
|
||||
|
||||
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
|
||||
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||
|
||||
data = new Object[dimensions[0]];
|
||||
preallocateArray(data, 0, dimensionsToPreallocate);
|
||||
|
||||
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively allocate the first dimensionsToPreallocate dimensions of the tree
|
||||
*
|
||||
* Pre-allocating the first few dimensions helps limit contention during tree traversals in put()
|
||||
*
|
||||
* @param subarray current node in the tree
|
||||
* @param dimension current level in the tree
|
||||
* @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first)
|
||||
*/
|
||||
private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) {
|
||||
if ( dimension >= dimensionsToPreallocate - 1 ) {
|
||||
return;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < subarray.length; i++ ) {
|
||||
subarray[i] = new Object[dimensions[dimension + 1]];
|
||||
preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate);
|
||||
}
|
||||
}
|
||||
|
||||
public T get(final int... keys) {
|
||||
|
|
@ -59,14 +94,30 @@ public class NestedIntegerArray<T> {
|
|||
for( int i = 0; i < numNestedDimensions; i++ ) {
|
||||
if ( keys[i] >= dimensions[i] )
|
||||
return null;
|
||||
|
||||
myData = (Object[])myData[keys[i]];
|
||||
if ( myData == null )
|
||||
return null;
|
||||
}
|
||||
|
||||
return (T)myData[keys[numNestedDimensions]];
|
||||
}
|
||||
|
||||
public synchronized void put(final T value, final int... keys) { // WARNING! value comes before the keys!
|
||||
/**
|
||||
* Insert a value at the position specified by the given keys.
|
||||
*
|
||||
* This method is thread-safe, however the caller MUST check the
|
||||
* return value to see if the put succeeded. This method RETURNS FALSE if
|
||||
* the value could not be inserted because there already was a value present
|
||||
* at the specified location. In this case the caller should do a get() to get
|
||||
* the already-existing value and (potentially) update it.
|
||||
*
|
||||
* @param value value to insert
|
||||
* @param keys keys specifying the location of the value in the tree
|
||||
* @return true if the value was inserted, false if it could not be inserted because there was already
|
||||
* a value at the specified position
|
||||
*/
|
||||
public boolean put(final T value, final int... keys) { // WARNING! value comes before the keys!
|
||||
if ( keys.length != numDimensions )
|
||||
throw new ReviewedStingException("Exactly " + numDimensions + " keys should be passed to this NestedIntegerArray but " + keys.length + " were provided");
|
||||
|
||||
|
|
@ -75,15 +126,35 @@ public class NestedIntegerArray<T> {
|
|||
for ( int i = 0; i < numNestedDimensions; i++ ) {
|
||||
if ( keys[i] >= dimensions[i] )
|
||||
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
|
||||
Object[] temp = (Object[])myData[keys[i]];
|
||||
if ( temp == null ) {
|
||||
temp = new Object[dimensions[i+1]];
|
||||
myData[keys[i]] = temp;
|
||||
|
||||
// If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized
|
||||
// check to see if the next branch exists, and if it doesn't, create it
|
||||
if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) {
|
||||
synchronized ( myData ) {
|
||||
if ( myData[keys[i]] == null ) {
|
||||
myData[keys[i]] = new Object[dimensions[i + 1]];
|
||||
}
|
||||
}
|
||||
}
|
||||
myData = temp;
|
||||
|
||||
myData = (Object[])myData[keys[i]];
|
||||
}
|
||||
|
||||
myData[keys[numNestedDimensions]] = value;
|
||||
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
|
||||
|
||||
// Insert the new value only if there still isn't any existing value in this position
|
||||
if ( myData[keys[numNestedDimensions]] == null ) {
|
||||
myData[keys[numNestedDimensions]] = value;
|
||||
}
|
||||
else {
|
||||
// Already have a value for this leaf (perhaps another thread came along and inserted one
|
||||
// while we traversed the tree), so return false to notify the caller that we didn't put
|
||||
// the item
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public List<T> getAllValues() {
|
||||
|
|
|
|||
|
|
@ -352,6 +352,9 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
|
||||
public static class CannotExecuteQScript extends UserException {
|
||||
public CannotExecuteQScript(String message) {
|
||||
super(String.format("Unable to execute QScript: " + message));
|
||||
}
|
||||
public CannotExecuteQScript(String message, Exception e) {
|
||||
super(String.format("Unable to execute QScript: " + message), e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,26 +22,29 @@
|
|||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
package org.broadinstitute.sting.utils.genotyper;
|
||||
|
||||
|
||||
//import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.*;
|
||||
|
||||
public class PerReadAlleleLikelihoodMap {
|
||||
public abstract class PerReadAlleleLikelihoodMap {
|
||||
|
||||
public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||
|
||||
private List<Allele> alleles;
|
||||
private Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
|
||||
public PerReadAlleleLikelihoodMap() {
|
||||
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
|
||||
alleles = new ArrayList<Allele>();
|
||||
}
|
||||
protected List<Allele> alleles;
|
||||
protected Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
|
||||
|
||||
public abstract void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log);
|
||||
public abstract ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log);
|
||||
|
||||
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
|
||||
Map<Allele,Double> likelihoodMap;
|
||||
|
|
@ -95,16 +98,6 @@ public class PerReadAlleleLikelihoodMap {
|
|||
public int getNumberOfStoredElements() {
|
||||
return likelihoodReadMap.size();
|
||||
}
|
||||
/**
|
||||
* Returns list of reads greedily associated with a particular allele.
|
||||
* Needs to loop for each read, and assign to each allele
|
||||
* @param a Desired allele
|
||||
* @return
|
||||
*/
|
||||
@Requires("a!=null")
|
||||
public List<GATKSAMRecord> getReadsAssociatedWithAllele(Allele a) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Map<Allele,Double> getLikelihoodsAssociatedWithPileupElement(PileupElement p) {
|
||||
if (!likelihoodReadMap.containsKey(p.getRead()))
|
||||
|
|
@ -129,4 +122,16 @@ public class PerReadAlleleLikelihoodMap {
|
|||
}
|
||||
return (maxLike - prevMaxLike > INDEL_LIKELIHOOD_THRESH ? mostLikelyAllele : Allele.NO_CALL );
|
||||
}
|
||||
}
|
||||
|
||||
public static PerReadAlleleLikelihoodMap getBestAvailablePerReadAlleleLikelihoodMap() {
|
||||
final Class PerReadAlleleLikelihoodMapClass = GATKLiteUtils.getProtectedClassIfAvailable(PerReadAlleleLikelihoodMap.class);
|
||||
try {
|
||||
Constructor constructor = PerReadAlleleLikelihoodMapClass.getDeclaredConstructor((Class[])null);
|
||||
constructor.setAccessible(true);
|
||||
return (PerReadAlleleLikelihoodMap)constructor.newInstance();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + PerReadAlleleLikelihoodMapClass.getSimpleName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.genotyper;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class StandardPerReadAlleleLikelihoodMap extends PerReadAlleleLikelihoodMap implements PublicPackageSource {
|
||||
|
||||
public StandardPerReadAlleleLikelihoodMap() {
|
||||
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
|
||||
alleles = new ArrayList<Allele>();
|
||||
}
|
||||
|
||||
// not implemented in the standard version
|
||||
public void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log) {}
|
||||
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { return pileup; }
|
||||
}
|
||||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.progressmeter;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -200,6 +201,14 @@ public class ProgressMeter {
|
|||
"Location", processingUnitName, processingUnitName));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the current runtime in nanoseconds
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public long getRuntimeInNanoseconds() {
|
||||
return timer.getElapsedTimeNano();
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility routine that prints out process information (including timing) every N records or
|
||||
* every M seconds, for N and M set in global variables.
|
||||
|
|
|
|||
|
|
@ -61,17 +61,6 @@ public class BaseRecalibration {
|
|||
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
// }
|
||||
|
||||
/**
|
||||
* Thread local cache to allow multi-threaded use of this class
|
||||
*/
|
||||
private ThreadLocal<ReadCovariates> readCovariatesCache;
|
||||
{
|
||||
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
|
||||
@Override protected ReadCovariates initialValue() {
|
||||
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor using a GATK Report file
|
||||
|
|
@ -113,13 +102,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
// Compute all covariates for the read
|
||||
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
|
||||
// TODO -- needs to be fixed.
|
||||
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,9 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final int key) {
|
||||
public synchronized String formatKey(final int key) {
|
||||
// This method is synchronized so that we don't attempt to do a get()
|
||||
// from the reverse lookup table while that table is being updated
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
|
|
@ -76,17 +78,32 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
private int keyForReadGroup(final String readGroupId) {
|
||||
if (!readGroupLookupTable.containsKey(readGroupId)) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
// Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR),
|
||||
// synchronize only the table updates.
|
||||
|
||||
// Before entering the synchronized block, check to see if this read group is not in our tables.
|
||||
// If it's not, either we will have to insert it, OR another thread will insert it first.
|
||||
// This preliminary check avoids doing any synchronization most of the time.
|
||||
if ( ! readGroupLookupTable.containsKey(readGroupId) ) {
|
||||
|
||||
synchronized ( this ) {
|
||||
|
||||
// Now we need to make sure the key is STILL not there, since another thread may have come along
|
||||
// and inserted it while we were waiting to enter this synchronized block!
|
||||
if ( ! readGroupLookupTable.containsKey(readGroupId) ) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return readGroupLookupTable.get(readGroupId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
public synchronized int maximumKeyValue() {
|
||||
// Synchronized so that we don't query table size while the tables are being updated
|
||||
return readGroupLookupTable.size() - 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,8 +60,9 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
private String mReadString = null;
|
||||
private GATKSAMReadGroupRecord mReadGroup = null;
|
||||
private byte[] reducedReadCounts = null;
|
||||
private int softStart = -1;
|
||||
private int softEnd = -1;
|
||||
private final static int UNINITIALIZED = -1;
|
||||
private int softStart = UNINITIALIZED;
|
||||
private int softEnd = UNINITIALIZED;
|
||||
|
||||
// because some values can be null, we don't want to duplicate effort
|
||||
private boolean retrievedReadGroup = false;
|
||||
|
|
@ -386,7 +387,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
||||
*/
|
||||
public int getSoftStart() {
|
||||
if (softStart < 0) {
|
||||
if ( softStart == UNINITIALIZED ) {
|
||||
softStart = getAlignmentStart();
|
||||
for (final CigarElement cig : getCigar().getCigarElements()) {
|
||||
final CigarOperator op = cig.getOperator();
|
||||
|
|
@ -408,17 +409,23 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
||||
*/
|
||||
public int getSoftEnd() {
|
||||
if ( softEnd < 0 ) {
|
||||
if ( softEnd == UNINITIALIZED ) {
|
||||
boolean foundAlignedBase = false;
|
||||
softEnd = getAlignmentEnd();
|
||||
final List<CigarElement> cigs = getCigar().getCigarElements();
|
||||
for (int i=cigs.size() - 1; i>=0; --i) {
|
||||
for (int i = cigs.size() - 1; i >= 0; --i) {
|
||||
final CigarElement cig = cigs.get(i);
|
||||
final CigarOperator op = cig.getOperator();
|
||||
|
||||
if (op == CigarOperator.SOFT_CLIP)
|
||||
if (op == CigarOperator.SOFT_CLIP) // assumes the soft clip that we found is at the end of the aligned read
|
||||
softEnd += cig.getLength();
|
||||
else if (op != CigarOperator.HARD_CLIP)
|
||||
else if (op != CigarOperator.HARD_CLIP) {
|
||||
foundAlignedBase = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !foundAlignedBase ) { // for example 64H14S, the soft end is actually the same as the alignment end
|
||||
softEnd = getAlignmentEnd();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.threading;
|
||||
|
||||
import java.lang.reflect.Array;
|
||||
|
||||
/**
|
||||
* ThreadLocal implementation for arrays
|
||||
*
|
||||
* Example usage:
|
||||
*
|
||||
* private ThreadLocal<byte[]> threadLocalByteArray = new ThreadLocalArray<byte[]>(length, byte.class);
|
||||
* ....
|
||||
* byte[] byteArray = threadLocalByteArray.get();
|
||||
*
|
||||
* @param <T> the type of the array itself (eg., int[], double[], etc.)
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ThreadLocalArray<T> extends ThreadLocal<T> {
|
||||
private int arraySize;
|
||||
private Class arrayElementType;
|
||||
|
||||
/**
|
||||
* Create a new ThreadLocalArray
|
||||
*
|
||||
* @param arraySize desired length of the array
|
||||
* @param arrayElementType type of the elements within the array (eg., Byte.class, Integer.class, etc.)
|
||||
*/
|
||||
public ThreadLocalArray( int arraySize, Class arrayElementType ) {
|
||||
super();
|
||||
|
||||
this.arraySize = arraySize;
|
||||
this.arrayElementType = arrayElementType;
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
protected T initialValue() {
|
||||
return (T)Array.newInstance(arrayElementType, arraySize);
|
||||
}
|
||||
}
|
||||
|
|
@ -193,6 +193,8 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
infoMap.put("isHet", g.isHet() ? "1" : "0");
|
||||
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
|
||||
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
|
||||
if ( g.hasDP() )
|
||||
infoMap.put(VCFConstants.DEPTH_KEY, g.getDP());
|
||||
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
|
||||
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
|
||||
infoMap.put(e.getKey(), e.getValue());
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue