Merge branch 'master' of ssh://gsa3/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-10-25 16:33:14 -04:00
commit 92fa7e953a
5 changed files with 235 additions and 25 deletions

View File

@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* @author ebanks
@ -118,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest {
@DataProvider(name = "PRTest")
public Object[][] createPRTestData() {
return new Object[][]{
{new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")},
{new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")},
{new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")},
{new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}
};
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")});
tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")});
tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")});
for ( final int nct : Arrays.asList(1, 2, 4) ) {
tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")});
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "PRTest")
public void testPR(PRTest params) {
public void testPR(final int nct, PRTest params) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T PrintReads" +
" -R " + hg18Reference +
" -I " + privateTestDir + "HiSeq.1mb.1RG.bam" +
" -nct " + nct +
" -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" +
params.args +
" -o %s",

View File

@ -0,0 +1,169 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
import java.util.List;
/**
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
* the reduced bam are above sufficient threshold)
*
* <h2>Input</h2>
* <p>
* The original and reduced BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -I:original original.bam \
* -I:reduced reduced.bam \
* -R ref.fasta \
* -T AssessReducedQuals \
* -o output.intervals
* </pre>
*
* @author ami
*/
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
private static final String reduced = "reduced";
private static final String original = "original";
private static final int originalQualsIndex = 0;
private static final int reducedQualsIndex = 1;
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
public int sufficientQualSum = 600;
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
public int qual_epsilon = 0;
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
public int debugLevel = 0;
@Output
protected PrintStream out;
public void initialize() {
if (debugLevel != 0)
out.println(" Debug mode" +
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
"Debug:\tqual_epsilon: "+qual_epsilon);
}
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@Override
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
boolean reportLocus;
final int[] quals = getPileupQuals(context.getBasePileup());
if (debugLevel != 0)
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
if(debugLevel != 0 && reportLocus)
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
return reportLocus ? ref.getLocus() : null;
}
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
final int[] quals = new int[2];
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
for( PileupElement p : readPileup ){
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
if ( isGoodRead(p,tags) ){
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
final int tagIndex = getTagIndex(tags);
quals[tagIndex] += tempQual;
if(debugLevel == 2)
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
}
}
if(debugLevel == 2){
out.println(printPileup[originalQualsIndex]);
out.println(printPileup[reducedQualsIndex]);
}
return quals;
}
private final boolean isGoodRead(PileupElement p, List<String> tags){
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
}
private final int getTagIndex(List<String> tags){
return tags.contains(reduced) ? 1 : 0;
}
@Override
public void onTraversalDone(GenomeLoc sum) {
if ( sum != null )
out.println(sum);
}
@Override
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
if ( lhs == null )
return rhs;
if ( rhs == null )
return lhs;
// if contiguous, just merge them
if ( lhs.contiguousP(rhs) )
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
// otherwise, print the lhs and start over with the rhs
out.println(lhs);
return rhs;
}
@Override
public GenomeLoc reduceInit() {
return null;
}
@Override
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
if ( value == null )
return sum;
if ( sum == null )
return value;
// if contiguous, just merge them
if ( sum.contiguousP(value) )
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
// otherwise, print the sum and start over with the value
out.println(sum);
return value;
}
}

View File

@ -25,9 +25,11 @@
package org.broadinstitute.sting.utils.collections;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
@ -38,39 +40,50 @@ import java.util.List;
public class NestedIntegerArray<T> {
private static Logger logger = Logger.getLogger(NestedIntegerArray.class);
protected final Object[] data;
protected final int numDimensions;
protected final int[] dimensions;
// Preallocate the first two dimensions to limit contention during tree traversals in put()
private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2;
public NestedIntegerArray(final int... dimensions) {
numDimensions = dimensions.length;
if ( numDimensions == 0 )
throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray");
this.dimensions = dimensions.clone();
int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE);
logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions)));
logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate));
data = new Object[dimensions[0]];
prepopulateArray(data, 0);
preallocateArray(data, 0, dimensionsToPreallocate);
logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate));
}
/**
* Recursively allocate the entire tree of arrays in all its dimensions.
* Recursively allocate the first dimensionsToPreallocate dimensions of the tree
*
* Doing this upfront uses more memory initially, but saves time over the course of the run
* and (crucially) avoids having to make threads wait while traversing the tree to check
* whether branches exist or not.
* Pre-allocating the first few dimensions helps limit contention during tree traversals in put()
*
* @param subarray current node in the tree
* @param dimension current level in the tree
* @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first)
*/
private void prepopulateArray( Object[] subarray, int dimension ) {
if ( dimension >= numDimensions - 1 ) {
private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) {
if ( dimension >= dimensionsToPreallocate - 1 ) {
return;
}
for ( int i = 0; i < subarray.length; i++ ) {
subarray[i] = new Object[dimensions[dimension + 1]];
prepopulateArray((Object[])subarray[i], dimension + 1);
preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate);
}
}
@ -82,8 +95,9 @@ public class NestedIntegerArray<T> {
if ( keys[i] >= dimensions[i] )
return null;
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
// down to the leaves
myData = (Object[])myData[keys[i]];
if ( myData == null )
return null;
}
return (T)myData[keys[numNestedDimensions]];
@ -92,8 +106,8 @@ public class NestedIntegerArray<T> {
/**
* Insert a value at the position specified by the given keys.
*
* This method is THREAD-SAFE despite not being synchronized, however the caller MUST
* check the return value to see if the put succeeded. This method RETURNS FALSE if
* This method is thread-safe, however the caller MUST check the
* return value to see if the put succeeded. This method RETURNS FALSE if
* the value could not be inserted because there already was a value present
* at the specified location. In this case the caller should do a get() to get
* the already-existing value and (potentially) update it.
@ -113,8 +127,17 @@ public class NestedIntegerArray<T> {
if ( keys[i] >= dimensions[i] )
throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")");
myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse
// down to the leaves
// If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized
// check to see if the next branch exists, and if it doesn't, create it
if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) {
synchronized ( myData ) {
if ( myData[keys[i]] == null ) {
myData[keys[i]] = new Object[dimensions[i + 1]];
}
}
}
myData = (Object[])myData[keys[i]];
}
synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it

View File

@ -116,9 +116,12 @@ public class BaseRecalibration {
// Compute all covariates for the read
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
// TODO -- needs to be fixed.
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
// TODO -- the output varies depending on whether we clear or not
//final ReadCovariates readCovariates = readCovariatesCache.get().clear();
// the original code -- doesn't do any clearing
final ReadCovariates readCovariates = readCovariatesCache.get();
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings

View File

@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
"Mills_and_1000G_gold_standard.indels", b37, true, false))
//
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
//
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
//
// example call set for wiki tutorial
//