From 3e2a0b15ed1fec4915abc15896425b0cf77c5600 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Mon, 13 May 2013 17:19:40 -0400 Subject: [PATCH] - Added a @Hidden option ( -outputInsertLength ) to PileupWalker that causes it to emit insert sizes together with the pileup (to assist Mark Daly's investigation of the contamination dependance on insert length) - Converted my old GATKBAMIndexText (within PileupWalkerIntegrationTest) to use a dataProvider - Added two integration tests to test -outputInsertLength option --- .../sting/gatk/walkers/qc/Pileup.java | 23 +++++-- .../qc/PileupWalkerIntegrationTest.java | 60 +++++++++++++------ 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java index bc98c670a..23bbf1460 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/Pileup.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -96,6 +93,10 @@ public class Pileup extends LocusWalker implements TreeReducibl @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) public List> rods = Collections.emptyList(); + @Hidden + @Argument(fullName="outputInsertLength",shortName = "outputInsertLength",doc="Add a column which contains the length of the insert each base comes from.",required=false) + public boolean outputInsertLength=false; + @Override public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { final String rods = getReferenceOrderedData( tracker ); @@ -104,6 +105,8 @@ public class Pileup extends LocusWalker implements TreeReducibl final StringBuilder s = new StringBuilder(); s.append(String.format("%s %s", basePileup.getPileupString((char)ref.getBase()), rods)); + if ( outputInsertLength ) + s.append(" ").append(insertLengthOutput(basePileup)); if ( SHOW_VERBOSE ) s.append(" ").append(createVerboseOutput(basePileup)); s.append("\n"); @@ -143,6 +146,18 @@ public class Pileup extends LocusWalker implements TreeReducibl return rodString; } + private static String insertLengthOutput(final ReadBackedPileup pileup) { + + Integer[] insertSizes=new Integer[pileup.depthOfCoverage()]; + + int i=0; + for ( PileupElement p : pileup ) { + insertSizes[i]=p.getRead().getInferredInsertSize(); + ++i; + } + return Utils.join(",",insertSizes); + } + private static String createVerboseOutput(final ReadBackedPileup pileup) { final StringBuilder sb = new StringBuilder(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java index 76654fb74..6141a484c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/PileupWalkerIntegrationTest.java @@ -26,13 +26,14 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; public class PileupWalkerIntegrationTest extends WalkerTest { - String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam " - + "-R " + hg19Reference + " -o %s "; @Test public void testGnarleyFHSPileup() { @@ -73,25 +74,50 @@ public class PileupWalkerIntegrationTest extends WalkerTest { //testing speedup to GATKBAMIndex - @Test - public void testPileupOnLargeBamChr20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775")); - executeTest("Testing single on big BAM at start of chromosome 20", spec); + @DataProvider(name="GATKBAMIndexTest") + public Object[][] makeMyDataProvider() { + List tests = new ArrayList(); + tests.add(new Object[]{"-L 20:1-76,050","8702701350de11a6d28204acefdc4775"}); + tests.add(new Object[]{"-L 20:10,000,000-10,001,100","818cf5a8229efe6f89fc1cd8145ccbe3"}); + tests.add(new Object[]{"-L 20:62,954,114-63,025,520","22471ea4a12e5139aef62bf8ff2a5b63"}); + tests.add(new Object[]{"-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ","08d899ed7c5a76ef3947bf67338acda1"}); + return tests.toArray(new Object[][]{}); } - @Test - public void testPileupOnLargeBamMid20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3")); - executeTest("Testing single on big BAM somewhere in chromosome 20", spec); + + @Test(dataProvider = "GATKBAMIndexTest") + public void testGATKBAMIndexSpeedup(final String intervals, final String md5){ + final String gatkArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam " + + "-R " + hg19Reference + " -o %s "; + + WalkerTestSpec spec = new WalkerTestSpec(gatkArgs + intervals, 1, Arrays.asList(md5)); + executeTest("Testing with intervals="+intervals, spec); } + + + /***********************/ + + // testing hidden option -outputInsertLength + private final static String SingleReadAligningOffChromosome1withInsertLengthMD5 = "279e2ec8832e540f47a6e2bdf4cef5ea"; @Test - public void testPileupOnLargeBamEnd20(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63")); - executeTest("Testing single at end of chromosome 20", spec); + public void testSingleReadAligningOffChromosome1withInsertLength() { + String gatk_args = "-T Pileup " + + " -I " + privateTestDir + "readOffb37contig1.bam" + + " -R " + b37KGReference + + " -outputInsertLength" + + " -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1withInsertLengthMD5)); + executeTest("Testing single read spanning off chromosome 1 (with insert length)", spec); } + @Test - public void testPileupOnLargeBam20Many(){ - WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ", - 1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1")); - executeTest("Testing single on big BAM many places", spec); + public void testGnarleyFHSPileupwithInsertLength() { + String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam " + + "-R " + hg18Reference + + " -outputInsertLength" + + " -L chr15:46,347,148 -o %s"; + String expected_md5 = "53ced173768f3d4d90b8a8206e72eae5"; + WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5)); + executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases (with insert length)", spec); } + }