diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 9878dbfc0..4c233ffdc 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -22,9 +22,12 @@ public class CovariateCounterWalker extends LocusWalker { @Argument(fullName="buggyMaxReadLen", doc="If we see a read longer than this, we assume there's a bug and abort", required=false) public int buggyMaxReadLen = 100000; - @Argument(fullName="OUTPUT_FILEROOT", shortName="outroot", required=false, doc="Filename root for the outputted logistic regression training files") - public String OUTPUT_FILEROOT = "output"; + @Argument(fullName="OUTPUT_FILEROOT", shortName="outroot", required=false, doc="Depreciated output file root -- now use --params to directly specify the file output name") + public String OUTPUT_FILEROOT = null; // going to blow up if specified + @Argument(fullName="params", shortName="params", required=false, doc="Filename root for the outputted logistic regression training files") + public String params = "output.recal_data.csv"; + @Argument(fullName="MIN_MAPPING_QUALITY", shortName="minmap", required=false, doc="Only use reads with at least this quality score") public int MIN_MAPPING_QUALITY = 1; @@ -205,19 +208,19 @@ public class CovariateCounterWalker extends LocusWalker { */ public PrintStream reduceInit() { try { - return new PrintStream( OUTPUT_FILEROOT+".recal_data.csv" ); + if ( OUTPUT_FILEROOT != null ) + throw new RuntimeException("OUTPUT_FILEROOT argument has been removed, please use --params from now on to directly specify the output parameter filename"); + return new PrintStream( params ); } catch ( FileNotFoundException e ) { throw new RuntimeException("Couldn't open output file", e); } } public void onTraversalDone(PrintStream recalTableStream) { - printInfo(out); - - out.printf("Writing raw recalibration data..."); out.flush(); + out.printf("Writing raw recalibration data..."); writeRecalTable(recalTableStream); out.printf("...done%n"); - + //out.printf("Writing logistic recalibration data%n"); //writeLogisticRecalibrationTable(); //out.printf("...done%n"); @@ -230,7 +233,7 @@ public class CovariateCounterWalker extends LocusWalker { * @param out */ private void printInfo(PrintStream out) { - out.printf("# date \"%s\"%n", new Date()); + //out.printf("# date \"%s\"%n", new Date()); out.printf("# collapsed_pos %b%n", collapsePos); out.printf("# collapsed_dinuc %b%n", collapseDinuc); out.printf("# counted_sites %d%n", counted_sites); @@ -249,8 +252,9 @@ public class CovariateCounterWalker extends LocusWalker { recalTableStream.println("rg,pos,Qrep,dn,nBases,nMismatches,Qemp"); for (String readGroup : new TreeSet(covariateCounter.getReadGroups()) ) { for ( RecalData datum: RecalData.sort(covariateCounter.getRecalData(readGroup)) ) { - if ( datum.N > 0 ) + if ( datum.N > 0 ) { recalTableStream.println(datum.toCSVString(collapsePos)); + } } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 53972aa0a..e4566c790 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -42,13 +42,13 @@ import java.io.FileNotFoundException; import java.lang.reflect.Method; @WalkerName("TableRecalibration") -@Requires({DataSource.READS, DataSource.REFERENCE}) +@Requires({DataSource.READS}) // , DataSource.REFERENCE}) public class TableRecalibrationWalker extends ReadWalker { - @Argument(shortName="params", doc="CountCovariates params file", required=true) + @Argument(fullName="params", shortName="params", doc="CountCovariates params file", required=true) public String paramsFile; - @Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false) - public SAMFileWriter outputBamFile = null; + @Argument(fullName="outputBam", shortName="outputBam", doc="output BAM file", required=true) + public SAMFileWriter outputBam = null; @Argument(shortName="rawQempirical", doc="If provided, we will use raw Qempirical scores calculated from the # mismatches and # bases, rather than the more conservative estimate of # mismatches + 1 / # bases + 1", required=false) public boolean rawQempirical = false; @@ -63,7 +63,7 @@ public class TableRecalibrationWalker extends ReadWalker [prevBase x base -> [cycle, qual, new qual]] HashMap cache = new HashMap(); @@ -271,7 +271,7 @@ public class TableRecalibrationWalker extends ReadWalker executeTest(final String name, WalkerTestSpec spec) { + protected Pair, List> executeTest(final String name, WalkerTestSpec spec) { List tmpFiles = new ArrayList(); for ( int i = 0; i < spec.nOutputFiles; i++ ) { try { @@ -110,14 +112,19 @@ public class WalkerTest extends BaseTest { final String args = String.format(spec.args, tmpFiles.toArray()); logger.warn(Utils.dupString('-', 80)); logger.warn(String.format("Executing test %s with GATK arguments: %s", name, args)); + CommandLineGATK instance = new CommandLineGATK(); CommandLineExecutable.start(instance, args.split(" ")); - return assertMatchingMD5s(name, tmpFiles, spec.md5s); + if ( CommandLineExecutable.result != 0 ) { + throw new RuntimeException("Error running the GATK with arguments: " + args); + } + + return new Pair, List>(tmpFiles, assertMatchingMD5s(name, tmpFiles, spec.md5s)); } @Test public void testWalkerTest() { - logger.warn("WalkerTest is just a framework"); + //logger.warn("WalkerTest is just a framework"); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperTest.java index 06c912f32..f5ca640c2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperTest.java @@ -44,6 +44,26 @@ public class SingleSampleGenotyperTest extends WalkerTest { // return true; //} + // -------------------------------------------------------------------------------------------------------------- + // + // testing calls with SLX, 454, and SOLID data + // + // -------------------------------------------------------------------------------------------------------------- + @Test + public void testMultiTechnologies() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T SingleSampleGenotyper" + + " -R /broad/1KG/reference/human_b36_both.fasta" + + " -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + + " -varout %s" + + " -L 1:10,000,000-10,100,000" + + " -m empirical", + 1, + Arrays.asList("b8975b303952edff3b0273165ba91001")); + + executeTest(String.format("testMultiTechnologies"), spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing the cache @@ -56,7 +76,7 @@ public class SingleSampleGenotyperTest extends WalkerTest { WalkerTest.WalkerTestSpec withoutCacheSpec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,100,000 --disableCache -m " + model.toString(), 1, Arrays.asList("")); - List withoutCache = executeTest("empirical1MbTest", withoutCacheSpec ); + List withoutCache = executeTest("empirical1MbTest", withoutCacheSpec ).getSecond(); WalkerTest.WalkerTestSpec withCacheSpec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,100,000 -m " + model.toString(), 1, @@ -114,6 +134,8 @@ public class SingleSampleGenotyperTest extends WalkerTest { executeTest("empirical1MbTest", spec); } + + // -------------------------------------------------------------------------------------------------------------- // // testing output formats diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersTest.java new file mode 100755 index 000000000..672c79b29 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersTest.java @@ -0,0 +1,66 @@ +package org.broadinstitute.sting.gatk.walkers.recalibration; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Arrays; +import java.util.List; +import java.io.File; + +public class RecalibrationWalkersTest extends WalkerTest { + static HashMap paramsFiles = new HashMap(); + + @Test + public void testCountCovariates1() { + HashMap e = new HashMap(); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "47664c48992f593258932583576b47e4" ); + //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b58185dc5fbdd88ca9539d940dff6c1a" ); + + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R /broad/1KG/reference/human_b36_both.fasta" + + " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -T CountCovariates" + + " -I " + bam + + " -L 1:10,000,000-11,000,000" + + " --params %s", + 1, // just one output file + Arrays.asList(md5)); + List result = executeTest("testCountCovariates1", spec).getFirst(); + paramsFiles.put(bam, result.get(0).getAbsolutePath()); + } + } + + @Test + public void testTableRecalibrator1() { + HashMap e = new HashMap(); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "c98525aca6493179f084159df0264782" ); + //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" ); + //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "28c5026359a952a5c652b1ccc2acac04" ); + + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); + String paramsFile = paramsFiles.get(bam); + System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); + + WalkerTestSpec spec = new WalkerTestSpec( + "-R /broad/1KG/reference/human_b36_both.fasta" + + " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -T TableRecalibration" + + " -I " + bam + + " -L 1:10,000,000-20,000,000" + + " --outputBam %s" + + " --params " + paramsFile, + 1, // just one output file + Arrays.asList(md5)); + executeTest("testTableRecalibrator1", spec); + } + } +} \ No newline at end of file diff --git a/python/1kgScripts/runme_freeze5.1.csh b/python/1kgScripts/runme_freeze5.1.csh new file mode 100755 index 000000000..ba401c18c --- /dev/null +++ b/python/1kgScripts/runme_freeze5.1.csh @@ -0,0 +1,3 @@ +python ~/dev/GenomeAnalysisTK/trunk/python/MergeBAMBatch.py -d freeze5.1 -q gsa lists/low_coverage_freeze5.list -n lists/naids_and_pop.txt +python ~/dev/GenomeAnalysisTK/trunk/python/MergeBAMBatch.py -d freeze5.1 -q gsa -s lists/trios_freeze5.list +