Merge branch 'master' of ssh://gsa3/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
92fa7e953a
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author ebanks
|
* @author ebanks
|
||||||
|
|
@ -118,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@DataProvider(name = "PRTest")
|
@DataProvider(name = "PRTest")
|
||||||
public Object[][] createPRTestData() {
|
public Object[][] createPRTestData() {
|
||||||
return new Object[][]{
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
{new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")},
|
|
||||||
{new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")},
|
tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")});
|
||||||
{new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")},
|
tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")});
|
||||||
{new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}
|
tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")});
|
||||||
};
|
|
||||||
|
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||||
|
tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")});
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "PRTest")
|
@Test(dataProvider = "PRTest")
|
||||||
public void testPR(PRTest params) {
|
public void testPR(final int nct, PRTest params) {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T PrintReads" +
|
"-T PrintReads" +
|
||||||
" -R " + hg18Reference +
|
" -R " + hg18Reference +
|
||||||
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
|
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
|
||||||
|
" -nct " + nct +
|
||||||
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
|
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
|
||||||
params.args +
|
params.args +
|
||||||
" -o %s",
|
" -o %s",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,169 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
|
||||||
|
* the reduced bam are above sufficient threshold)
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* The original and reduced BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -I:original original.bam \
|
||||||
|
* -I:reduced reduced.bam \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T AssessReducedQuals \
|
||||||
|
* -o output.intervals
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author ami
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||||
|
|
||||||
|
private static final String reduced = "reduced";
|
||||||
|
private static final String original = "original";
|
||||||
|
private static final int originalQualsIndex = 0;
|
||||||
|
private static final int reducedQualsIndex = 1;
|
||||||
|
|
||||||
|
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
|
||||||
|
public int sufficientQualSum = 600;
|
||||||
|
|
||||||
|
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||||
|
public int qual_epsilon = 0;
|
||||||
|
|
||||||
|
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
|
||||||
|
public int debugLevel = 0;
|
||||||
|
|
||||||
|
@Output
|
||||||
|
protected PrintStream out;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if (debugLevel != 0)
|
||||||
|
out.println(" Debug mode" +
|
||||||
|
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
|
||||||
|
"Debug:\tqual_epsilon: "+qual_epsilon);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( tracker == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
boolean reportLocus;
|
||||||
|
final int[] quals = getPileupQuals(context.getBasePileup());
|
||||||
|
if (debugLevel != 0)
|
||||||
|
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
|
||||||
|
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
|
||||||
|
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
|
||||||
|
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
|
||||||
|
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
|
||||||
|
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
|
||||||
|
if(debugLevel != 0 && reportLocus)
|
||||||
|
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
|
||||||
|
|
||||||
|
return reportLocus ? ref.getLocus() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||||
|
|
||||||
|
final int[] quals = new int[2];
|
||||||
|
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
|
||||||
|
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
|
||||||
|
|
||||||
|
for( PileupElement p : readPileup ){
|
||||||
|
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
|
||||||
|
if ( isGoodRead(p,tags) ){
|
||||||
|
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
|
||||||
|
final int tagIndex = getTagIndex(tags);
|
||||||
|
quals[tagIndex] += tempQual;
|
||||||
|
if(debugLevel == 2)
|
||||||
|
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(debugLevel == 2){
|
||||||
|
out.println(printPileup[originalQualsIndex]);
|
||||||
|
out.println(printPileup[reducedQualsIndex]);
|
||||||
|
}
|
||||||
|
return quals;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean isGoodRead(PileupElement p, List<String> tags){
|
||||||
|
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int getTagIndex(List<String> tags){
|
||||||
|
return tags.contains(reduced) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(GenomeLoc sum) {
|
||||||
|
if ( sum != null )
|
||||||
|
out.println(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||||
|
if ( lhs == null )
|
||||||
|
return rhs;
|
||||||
|
|
||||||
|
if ( rhs == null )
|
||||||
|
return lhs;
|
||||||
|
|
||||||
|
// if contiguous, just merge them
|
||||||
|
if ( lhs.contiguousP(rhs) )
|
||||||
|
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||||
|
|
||||||
|
// otherwise, print the lhs and start over with the rhs
|
||||||
|
out.println(lhs);
|
||||||
|
return rhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GenomeLoc reduceInit() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||||
|
if ( value == null )
|
||||||
|
return sum;
|
||||||
|
|
||||||
|
if ( sum == null )
|
||||||
|
return value;
|
||||||
|
|
||||||
|
// if contiguous, just merge them
|
||||||
|
if ( sum.contiguousP(value) )
|
||||||
|
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||||
|
|
||||||
|
// otherwise, print the sum and start over with the value
|
||||||
|
out.println(sum);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,9 +25,11 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.collections;
|
package org.broadinstitute.sting.utils.collections;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -38,39 +40,50 @@ import java.util.List;
|
||||||
|
|
||||||
public class NestedIntegerArray<T> {
|
public class NestedIntegerArray<T> {
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(NestedIntegerArray.class);
|
||||||
|
|
||||||
protected final Object[] data;
|
protected final Object[] data;
|
||||||
|
|
||||||
protected final int numDimensions;
|
protected final int numDimensions;
|
||||||
protected final int[] dimensions;
|
protected final int[] dimensions;
|
||||||
|
|
||||||
|
// Preallocate the first two dimensions to limit contention during tree traversals in put()
|
||||||
|
private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2;
|
||||||
|
|
||||||
public NestedIntegerArray(final int... dimensions) {
|
public NestedIntegerArray(final int... dimensions) {
|
||||||
numDimensions = dimensions.length;
|
numDimensions = dimensions.length;
|
||||||
if ( numDimensions == 0 )
|
if ( numDimensions == 0 )
|
||||||
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
|
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
|
||||||
this.dimensions = dimensions.clone();
|
this.dimensions = dimensions.clone();
|
||||||
|
|
||||||
|
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
|
||||||
|
|
||||||
|
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
|
||||||
|
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||||
|
|
||||||
data = new Object[dimensions[0]];
|
data = new Object[dimensions[0]];
|
||||||
prepopulateArray(data, 0);
|
preallocateArray(data, 0, dimensionsToPreallocate);
|
||||||
|
|
||||||
|
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively allocate the entire tree of arrays in all its dimensions.
|
* Recursively allocate the first dimensionsToPreallocate dimensions of the tree
|
||||||
*
|
*
|
||||||
* Doing this upfront uses more memory initially, but saves time over the course of the run
|
* Pre-allocating the first few dimensions helps limit contention during tree traversals in put()
|
||||||
* and (crucially) avoids having to make threads wait while traversing the tree to check
|
|
||||||
* whether branches exist or not.
|
|
||||||
*
|
*
|
||||||
* @param subarray current node in the tree
|
* @param subarray current node in the tree
|
||||||
* @param dimension current level in the tree
|
* @param dimension current level in the tree
|
||||||
|
* @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first)
|
||||||
*/
|
*/
|
||||||
private void prepopulateArray( Object[] subarray, int dimension ) {
|
private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) {
|
||||||
if ( dimension >= numDimensions - 1 ) {
|
if ( dimension >= dimensionsToPreallocate - 1 ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( int i = 0; i < subarray.length; i++ ) {
|
for ( int i = 0; i < subarray.length; i++ ) {
|
||||||
subarray[i] = new Object[dimensions[dimension + 1]];
|
subarray[i] = new Object[dimensions[dimension + 1]];
|
||||||
prepopulateArray((Object[])subarray[i], dimension + 1);
|
preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,8 +95,9 @@ public class NestedIntegerArray<T> {
|
||||||
if ( keys[i] >= dimensions[i] )
|
if ( keys[i] >= dimensions[i] )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
|
myData = (Object[])myData[keys[i]];
|
||||||
// down to the leaves
|
if ( myData == null )
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (T)myData[keys[numNestedDimensions]];
|
return (T)myData[keys[numNestedDimensions]];
|
||||||
|
|
@ -92,8 +106,8 @@ public class NestedIntegerArray<T> {
|
||||||
/**
|
/**
|
||||||
* Insert a value at the position specified by the given keys.
|
* Insert a value at the position specified by the given keys.
|
||||||
*
|
*
|
||||||
* This method is THREAD-SAFE despite not being synchronized, however the caller MUST
|
* This method is thread-safe, however the caller MUST check the
|
||||||
* check the return value to see if the put succeeded. This method RETURNS FALSE if
|
* return value to see if the put succeeded. This method RETURNS FALSE if
|
||||||
* the value could not be inserted because there already was a value present
|
* the value could not be inserted because there already was a value present
|
||||||
* at the specified location. In this case the caller should do a get() to get
|
* at the specified location. In this case the caller should do a get() to get
|
||||||
* the already-existing value and (potentially) update it.
|
* the already-existing value and (potentially) update it.
|
||||||
|
|
@ -113,8 +127,17 @@ public class NestedIntegerArray<T> {
|
||||||
if ( keys[i] >= dimensions[i] )
|
if ( keys[i] >= dimensions[i] )
|
||||||
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
|
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
|
||||||
|
|
||||||
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
|
// If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized
|
||||||
// down to the leaves
|
// check to see if the next branch exists, and if it doesn't, create it
|
||||||
|
if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) {
|
||||||
|
synchronized ( myData ) {
|
||||||
|
if ( myData[keys[i]] == null ) {
|
||||||
|
myData[keys[i]] = new Object[dimensions[i + 1]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
myData = (Object[])myData[keys[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
|
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it
|
||||||
|
|
|
||||||
|
|
@ -116,9 +116,12 @@ public class BaseRecalibration {
|
||||||
// Compute all covariates for the read
|
// Compute all covariates for the read
|
||||||
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||||
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||||
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
|
// TODO -- the output varies depending on whether we clear or not
|
||||||
// TODO -- needs to be fixed.
|
//final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||||
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
|
||||||
|
// the original code -- doesn't do any clearing
|
||||||
|
final ReadCovariates readCovariates = readCovariatesCache.get();
|
||||||
|
|
||||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||||
|
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,13 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
||||||
"Mills_and_1000G_gold_standard.indels", b37, true, false))
|
"Mills_and_1000G_gold_standard.indels", b37, true, false))
|
||||||
|
|
||||||
|
//
|
||||||
|
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
|
||||||
|
//
|
||||||
|
|
||||||
|
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
|
||||||
|
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
|
||||||
|
|
||||||
//
|
//
|
||||||
// example call set for wiki tutorial
|
// example call set for wiki tutorial
|
||||||
//
|
//
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue