Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ami Levy Moonshine 2012-09-17 09:58:27 -04:00
commit ccc3f4ff8d
17 changed files with 148 additions and 122 deletions

View File

@ -195,8 +195,8 @@ public class ErrorModel {
if (eventLength < 0 && pileupElement.isBeforeDeletionStart() && pileupElement.getEventLength() == -eventLength)
return true;
if (eventLength > 0 && pileupElement.isBeforeInsertion() &&
Arrays.equals(pileupElement.getEventBases().getBytes(),alleleBases))
if (eventLength > 0 && pileupElement.isBeforeInsertion() &&
Arrays.equals(pileupElement.getEventBases().getBytes(),Arrays.copyOfRange(alleleBases,1,alleleBases.length))) // allele contains ref byte, but pileupElement's event bases doesn't
return true;
return false;

View File

@ -199,6 +199,8 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
numAlleles, log10AlleleFrequencyPriors, result);
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
}
int k=0;
}
public static CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles,
@ -408,6 +410,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
result.setLog10LikelihoodOfAFzero(log10Lof0);
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return log10Lof0;
} else {

View File

@ -34,7 +34,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -I " + bam +
" -L " + interval +
args +
" --no_plots" +
" -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
" -o %s";
}
@ -50,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "be6c7bc0b79a2d0395d21cd0154540d5")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "65781095beb41d8feca26e93e04dcc0b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8ee1fed1713daca1f36e8b30bee2cd23")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "9449d8a8baac742f46673e9b8314220b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "39313c6e3b85142548fee9b6c130e7b6")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "15eae9e834ed80b24660393c6df87f85")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "8485d8fd5e780e98d720dfbf79f26528")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c423d1d443822dae404239bb9a746b96")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "fb0a6aef430f562ed5e0002d03e0c619")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "efee7bcb89abe36da1cfd8a635d37cd2")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "0e8a3238902a1ff0f0c657fb09b4c022")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5e58d3dcf5ca38f008a64d1c0743ed83")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "1a8e5c85c7935eb1bd2203f5c86ce1db")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c423d1d443822dae404239bb9a746b96")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "6762b39dc027056365280a9d582a6713")},
};
}
@ -88,7 +87,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -R " + b36KGReference +
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
" -L 1:10,000,000-10,200,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.CommandLineException.class);
@ -102,7 +100,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -R " + b36KGReference +
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
" -L 1:50,000-80,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.class);

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.Arrays;
import org.testng.annotations.Test;
/**
* Created by IntelliJ IDEA.
@ -20,6 +20,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
final String REFSAMPLE_NAME = "NA12878";
final String MTINTERVALS = "MT:1-1000";
final String LSVINTERVALS = "20:40,500,000-41,000,000";
final String LSVINTERVALS_SHORT = "20:40,500,000-41,501,000";
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
@ -38,6 +39,13 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test_short(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
REF, LSV_BAM, LSVINTERVALS_SHORT, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
@ -47,22 +55,26 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0ff90fa3882a3fb5089a7bba50dd8ae3");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","54241500a8ce7df4bedab6e29099dba5");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","5515c6b4249505f78eb54140725e3f72");
}
@Test(enabled = true)
public void testSNP_ACS_Pools() {
PC_LSV_Test(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","90af837f372e3d5143af30bf5c8c2b75");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9514ed15c7030b6d47e04e6a3a2b0a3e");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","38599f7650a44c5ed7bdd19865483b99");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","789a54438553179b9abec1fbe4df754c");
}
@Test(enabled = true)
@ -72,6 +84,6 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "4d16d3c9475637bad70e9dc2eafe2da2");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "d1b48f6f3a175fcba9aec6d427005a45");
}
}

View File

@ -63,6 +63,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
import java.io.File;
import java.io.OutputStream;
import java.util.*;
/**

View File

@ -50,7 +50,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
public SAMFileWriterStorage( SAMFileWriterStub stub ) {
this(stub,stub.getSAMFile());
this(stub,stub.getOutputFile());
}
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
@ -66,7 +66,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
if(stub.getMaxRecordsInRam() != null)
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());
if(stub.getSAMFile() != null) {
if(stub.getOutputFile() != null) {
try {
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
}
@ -74,8 +74,8 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
}
}
else if(stub.getSAMOutputStream() != null){
this.writer = factory.makeSAMWriter( stub.getFileHeader(), stub.isPresorted(), stub.getSAMOutputStream());
else if(stub.getOutputStream() != null){
this.writer = factory.makeSAMWriter( stub.getFileHeader(), stub.isPresorted(), stub.getOutputStream());
}
else
throw new UserException("Unable to write to SAM file; neither a target file nor a stream has been specified");

View File

@ -69,9 +69,9 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
* @param stub Stub to use when constructing the output file.
*/
public VariantContextWriterStorage(VariantContextWriterStub stub) {
if ( stub.getFile() != null ) {
this.file = stub.getFile();
writer = vcfWriterToFile(stub,stub.getFile(),true);
if ( stub.getOutputFile() != null ) {
this.file = stub.getOutputFile();
writer = vcfWriterToFile(stub,stub.getOutputFile(),true);
}
else if ( stub.getOutputStream() != null ) {
this.file = null;

View File

@ -150,7 +150,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
* Retrieves the SAM file to (ultimately) be created.
* @return The SAM file. Must not be null.
*/
public File getSAMFile() {
public File getOutputFile() {
return samFile;
}
@ -162,7 +162,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
simplifyBAM = v;
}
public OutputStream getSAMOutputStream() {
public OutputStream getOutputStream() {
return samOutputStream;
}
@ -220,7 +220,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
/**
* Gets whether to generate an md5 on-the-fly for this BAM.
* @return True generates the md5. False means skip writing the file.
* @param generateMD5 True generates the md5. False means skip writing the file.
*/
public void setGenerateMD5(boolean generateMD5) {
if(writeStarted)

View File

@ -27,6 +27,9 @@ package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import java.io.File;
import java.io.OutputStream;
/**
* A stub used for managing IO. Acts as a proxy for IO streams
* not yet created or streams that need significant external
@ -43,4 +46,14 @@ public interface Stub<StreamType> {
* @param outputTracker The connector used to provide an appropriate stream.
*/
public void register( OutputTracker outputTracker );
/**
* Returns the OutputStream represented by this stub or null if not available.
*/
public OutputStream getOutputStream();
/**
* Returns the File represented by this stub or null if not available.
*/
public File getOutputFile();
}

View File

@ -140,7 +140,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* Retrieves the file to (ultimately) be created.
* @return The file. Can be null if genotypeStream is not.
*/
public File getFile() {
public File getOutputFile() {
return genotypeFile;
}
@ -148,7 +148,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* Retrieves the output stearm to which to (ultimately) write.
* @return The file. Can be null if genotypeFile is not.
*/
public PrintStream getOutputStream() {
public OutputStream getOutputStream() {
return genotypeStream;
}
@ -196,7 +196,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
if ( forceBCF || (getOutputFile() != null && VariantContextWriterFactory.isBCFOutput(getOutputFile())) )
options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
@ -271,7 +271,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
public boolean alsoWriteBCFForTest() {
return engine.getArguments().numberOfDataThreads == 1 && // only works single threaded
! isCompressed() && // for non-compressed outputs
getFile() != null && // that are going to disk
getOutputFile() != null && // that are going to disk
engine.getArguments().generateShadowBCF; // and we actually want to do it
}

View File

@ -161,6 +161,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
protected Boolean USE_ALL_ANNOTATIONS = false;
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false;

View File

@ -49,7 +49,6 @@ public class BQSRGatherer extends Gatherer {
@Override
public void gather(List<File> inputs, File output) {
RecalibrationReport generalReport = null;
final PrintStream outputFile;
try {
outputFile = new PrintStream(output);
@ -57,6 +56,7 @@ public class BQSRGatherer extends Gatherer {
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
}
RecalibrationReport generalReport = null;
for (File input : inputs) {
final RecalibrationReport inputReport = new RecalibrationReport(input);
if (generalReport == null)
@ -70,14 +70,15 @@ public class BQSRGatherer extends Gatherer {
generalReport.calculateQuantizedQualities();
RecalibrationArgumentCollection RAC = generalReport.getRAC();
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original");
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal");
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
if ( RAC.RECAL_PDF_FILE != null ) {
RAC.RECAL_TABLE_FILE = output;
if ( RAC.existingRecalibrationReport != null ) {
final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
else {
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
}
generalReport.output(outputFile);

View File

@ -50,7 +50,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
@ -110,6 +110,7 @@ import java.util.ArrayList;
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
@ -150,7 +151,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
RecalUtils.listAvailableCovariates(logger);
System.exit(0);
}
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
RAC.existingRecalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
@ -169,6 +170,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
}
try {
RAC.RECAL_TABLE = new PrintStream(RAC.RECAL_TABLE_FILE);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
}
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
@ -284,7 +291,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
generateReport();
logger.info("...done!");
if (!RAC.NO_PLOTS) {
if (RAC.RECAL_PDF_FILE != null) {
logger.info("Generating recalibration plots...");
generatePlots();
}
@ -296,10 +303,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
}
@ -313,14 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
}
private void generateReport() {
PrintStream output;
try {
output = new PrintStream(RAC.RECAL_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
}
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates);
}
}

View File

@ -28,10 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File;
import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
@ -59,14 +59,29 @@ public class RecalibrationArgumentCollection {
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
*/
@Gather(BQSRGatherer.class)
@Output
public File RECAL_FILE;
@Output(doc = "The output recalibration table file to create", required = true)
public File RECAL_TABLE_FILE = null;
public PrintStream RECAL_TABLE;
/**
* List all implemented covariates.
* If not provided, then no plots will be generated (useful for queue scatter/gathering).
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
*/
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false)
public File RECAL_PDF_FILE = null;
/**
* If not provided, then a temporary file is created and then deleted upon completion.
*/
@Hidden
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
public File RECAL_CSV_FILE = null;
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
public boolean LIST_ONLY = false;
@ -166,14 +181,8 @@ public class RecalibrationArgumentCollection {
@Hidden
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Argument(fullName = "keep_intermediate_files", shortName = "k", required = false, doc ="does not remove the temporary csv file created to generate the plots")
public boolean KEEP_INTERMEDIATE_FILES = false;
@Hidden
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
public boolean NO_PLOTS = false;
public File recalibrationReport = null;
public File existingRecalibrationReport = null;
public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
@ -205,12 +214,10 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
argumentsTable.addRowID("plot_pdf_file", true);
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;

View File

@ -126,7 +126,9 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Input(fullName="goldStandard", shortName = "gold", doc="Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison", required=false)
public RodBinding<VariantContext> goldStandard = null;
// Help arguments
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false)
protected Boolean LIST = false;

View File

@ -47,7 +47,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
@ -333,8 +333,8 @@ public class RecalUtils {
return covariate.getClass().getSimpleName().split("Covariate")[0];
}
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), RAC.RECAL_TABLE);
}
/**
@ -362,46 +362,36 @@ public class RecalUtils {
report.print(outputFile);
}
private static Pair<PrintStream, File> initializeRecalibrationPlot(File filename) {
final PrintStream deltaTableStream;
final File deltaTableFileName = new File(filename + ".csv");
try {
deltaTableStream = new PrintStream(deltaTableFileName);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(deltaTableFileName, "File " + deltaTableFileName + " could not be created");
}
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
}
private static void outputRecalibrationPlot(final File gatkReportFilename, Pair<PrintStream, File> files, boolean keepIntermediates) {
final File csvFileName = files.getSecond();
final File plotFileName = new File(csvFileName + ".pdf");
files.getFirst().close();
private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
executor.addArgs(csvFileName.getAbsolutePath());
executor.addArgs(gatkReportFilename.getAbsolutePath());
executor.addArgs(plotFileName.getAbsolutePath());
executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
executor.exec();
if (!keepIntermediates)
if (!csvFileName.delete())
throw new ReviewedStingException("Could not find file " + csvFileName.getAbsolutePath());
}
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
outputRecalibrationPlot(filename, files, keepIntermediates);
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) {
generateRecalibrationPlot(RAC, original, null, requestedCovariates);
}
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
outputRecalibrationPlot(filename, files, keepIntermediates);
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
final PrintStream csvFile;
try {
if ( RAC.RECAL_CSV_FILE == null ) {
RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
RAC.RECAL_CSV_FILE.deleteOnExit();
}
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
}
if ( recalibrated != null )
writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
outputRecalibrationPlot(RAC);
}
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {

View File

@ -284,14 +284,11 @@ public class RecalibrationReport {
else if (argument.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
else if (argument.equals("keep_intermediate_files"))
RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value);
else if (argument.equals("no_plots"))
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("plot_pdf_file"))
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;