Merge branch 'master' of ssh://gsa3/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
92fa7e953a
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -118,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createPRTestData() {
|
||||
return new Object[][]{
|
||||
{new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")},
|
||||
{new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")},
|
||||
{new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")},
|
||||
{new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}
|
||||
};
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")});
|
||||
tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")});
|
||||
tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")});
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PRTest")
|
||||
public void testPR(PRTest params) {
|
||||
public void testPR(final int nct, PRTest params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads" +
|
||||
" -R " + hg18Reference +
|
||||
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
|
||||
" -nct " + nct +
|
||||
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
|
||||
params.args +
|
||||
" -o %s",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
|
||||
* the reduced bam are above sufficient threshold)
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The original and reduced BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -I:original original.bam \
|
||||
* -I:reduced reduced.bam \
|
||||
* -R ref.fasta \
|
||||
* -T AssessReducedQuals \
|
||||
* -o output.intervals
|
||||
* </pre>
|
||||
*
|
||||
* @author ami
|
||||
*/
|
||||
|
||||
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||
|
||||
private static final String reduced = "reduced";
|
||||
private static final String original = "original";
|
||||
private static final int originalQualsIndex = 0;
|
||||
private static final int reducedQualsIndex = 1;
|
||||
|
||||
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
|
||||
public int sufficientQualSum = 600;
|
||||
|
||||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||
public int qual_epsilon = 0;
|
||||
|
||||
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
|
||||
public int debugLevel = 0;
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
public void initialize() {
|
||||
if (debugLevel != 0)
|
||||
out.println(" Debug mode" +
|
||||
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
|
||||
"Debug:\tqual_epsilon: "+qual_epsilon);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
@Override
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
boolean reportLocus;
|
||||
final int[] quals = getPileupQuals(context.getBasePileup());
|
||||
if (debugLevel != 0)
|
||||
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
|
||||
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
|
||||
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
|
||||
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
|
||||
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
|
||||
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
|
||||
if(debugLevel != 0 && reportLocus)
|
||||
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
|
||||
|
||||
return reportLocus ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||
|
||||
final int[] quals = new int[2];
|
||||
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
|
||||
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
|
||||
|
||||
for( PileupElement p : readPileup ){
|
||||
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
|
||||
if ( isGoodRead(p,tags) ){
|
||||
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
|
||||
final int tagIndex = getTagIndex(tags);
|
||||
quals[tagIndex] += tempQual;
|
||||
if(debugLevel == 2)
|
||||
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
|
||||
}
|
||||
}
|
||||
if(debugLevel == 2){
|
||||
out.println(printPileup[originalQualsIndex]);
|
||||
out.println(printPileup[reducedQualsIndex]);
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
||||
private final boolean isGoodRead(PileupElement p, List<String> tags){
|
||||
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||
}
|
||||
|
||||
private final int getTagIndex(List<String> tags){
|
||||
return tags.contains(reduced) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||
if ( lhs == null )
|
||||
return rhs;
|
||||
|
||||
if ( rhs == null )
|
||||
return lhs;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( lhs.contiguousP(rhs) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||
|
||||
// otherwise, print the lhs and start over with the rhs
|
||||
out.println(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( sum.contiguousP(value) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||
|
||||
// otherwise, print the sum and start over with the value
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -25,9 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -38,39 +40,50 @@ import java.util.List;
|
|||
|
||||
public class NestedIntegerArray<T> {
|
||||
|
||||
private static Logger logger = Logger.getLogger(NestedIntegerArray.class);
|
||||
|
||||
protected final Object[] data;
|
||||
|
||||
protected final int numDimensions;
|
||||
protected final int[] dimensions;
|
||||
|
||||
// Preallocate the first two dimensions to limit contention during tree traversals in put()
|
||||
private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2;
|
||||
|
||||
public NestedIntegerArray(final int... dimensions) {
|
||||
numDimensions = dimensions.length;
|
||||
if ( numDimensions == 0 )
|
||||
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
|
||||
this.dimensions = dimensions.clone();
|
||||
|
||||
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
|
||||
|
||||
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
|
||||
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||
|
||||
data = new Object[dimensions[0]];
|
||||
prepopulateArray(data, 0);
|
||||
preallocateArray(data, 0, dimensionsToPreallocate);
|
||||
|
||||
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively allocate the entire tree of arrays in all its dimensions.
|
||||
* Recursively allocate the first dimensionsToPreallocate dimensions of the tree
|
||||
*
|
||||
* Doing this upfront uses more memory initially, but saves time over the course of the run
|
||||
* and (crucially) avoids having to make threads wait while traversing the tree to check
|
||||
* whether branches exist or not.
|
||||
* Pre-allocating the first few dimensions helps limit contention during tree traversals in put()
|
||||
*
|
||||
* @param subarray current node in the tree
|
||||
* @param dimension current level in the tree
|
||||
* @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first)
|
||||
*/
|
||||
private void prepopulateArray( Object[] subarray, int dimension ) {
|
||||
if ( dimension >= numDimensions - 1 ) {
|
||||
private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) {
|
||||
if ( dimension >= dimensionsToPreallocate - 1 ) {
|
||||
return;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < subarray.length; i++ ) {
|
||||
subarray[i] = new Object[dimensions[dimension + 1]];
|
||||
prepopulateArray((Object[])subarray[i], dimension + 1);
|
||||
preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -82,8 +95,9 @@ public class NestedIntegerArray<T> {
|
|||
if ( keys[i] >= dimensions[i] )
|
||||
return null;
|
||||
|
||||
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
|
||||
// down to the leaves
|
||||
myData = (Object[])myData[keys[i]];
|
||||
if ( myData == null )
|
||||
return null;
|
||||
}
|
||||
|
||||
return (T)myData[keys[numNestedDimensions]];
|
||||
|
|
@ -92,8 +106,8 @@ public class NestedIntegerArray<T> {
|
|||
/**
|
||||
* Insert a value at the position specified by the given keys.
|
||||
*
|
||||
* This method is THREAD-SAFE despite not being synchronized, however the caller MUST
|
||||
* check the return value to see if the put succeeded. This method RETURNS FALSE if
|
||||
* This method is thread-safe, however the caller MUST check the
|
||||
* return value to see if the put succeeded. This method RETURNS FALSE if
|
||||
* the value could not be inserted because there already was a value present
|
||||
* at the specified location. In this case the caller should do a get() to get
|
||||
* the already-existing value and (potentially) update it.
|
||||
|
|
@ -113,8 +127,17 @@ public class NestedIntegerArray<T> {
|
|||
if ( keys[i] >= dimensions[i] )
|
||||
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
|
||||
|
||||
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
|
||||
// down to the leaves
|
||||
// If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized
|
||||
// check to see if the next branch exists, and if it doesn't, create it
|
||||
if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) {
|
||||
synchronized ( myData ) {
|
||||
if ( myData[keys[i]] == null ) {
|
||||
myData[keys[i]] = new Object[dimensions[i + 1]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
myData = (Object[])myData[keys[i]];
|
||||
}
|
||||
|
||||
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
|
||||
|
|
|
|||
|
|
@ -116,9 +116,12 @@ public class BaseRecalibration {
|
|||
// Compute all covariates for the read
|
||||
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
|
||||
// TODO -- needs to be fixed.
|
||||
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||
// TODO -- the output varies depending on whether we clear or not
|
||||
//final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||
|
||||
// the original code -- doesn't do any clearing
|
||||
final ReadCovariates readCovariates = readCovariatesCache.get();
|
||||
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
|
|
|
|||
|
|
@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript {
|
|||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
||||
"Mills_and_1000G_gold_standard.indels", b37, true, false))
|
||||
|
||||
|
||||
//
|
||||
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
|
||||
//
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
|
||||
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
|
||||
|
||||
//
|
||||
// example call set for wiki tutorial
|
||||
//
|
||||
|
|
|
|||
Loading…
Reference in New Issue