Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
ccc3f4ff8d
|
|
@ -195,8 +195,8 @@ public class ErrorModel {
|
|||
if (eventLength < 0 && pileupElement.isBeforeDeletionStart() && pileupElement.getEventLength() == -eventLength)
|
||||
return true;
|
||||
|
||||
if (eventLength > 0 && pileupElement.isBeforeInsertion() &&
|
||||
Arrays.equals(pileupElement.getEventBases().getBytes(),alleleBases))
|
||||
if (eventLength > 0 && pileupElement.isBeforeInsertion() &&
|
||||
Arrays.equals(pileupElement.getEventBases().getBytes(),Arrays.copyOfRange(alleleBases,1,alleleBases.length))) // allele contains ref byte, but pileupElement's event bases doesn't
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -199,6 +199,8 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
|||
numAlleles, log10AlleleFrequencyPriors, result);
|
||||
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
|
||||
}
|
||||
|
||||
int k=0;
|
||||
}
|
||||
|
||||
public static CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, double[] newGL, int originalPloidy, int newGLPloidy, int numAlleles,
|
||||
|
|
@ -408,6 +410,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
|||
|
||||
result.setLog10LikelihoodOfAFzero(log10Lof0);
|
||||
result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
|
||||
return log10Lof0;
|
||||
|
||||
} else {
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
" -I " + bam +
|
||||
" -L " + interval +
|
||||
args +
|
||||
" --no_plots" +
|
||||
" -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
|
||||
" -o %s";
|
||||
}
|
||||
|
|
@ -50,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "be6c7bc0b79a2d0395d21cd0154540d5")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "65781095beb41d8feca26e93e04dcc0b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8ee1fed1713daca1f36e8b30bee2cd23")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "9449d8a8baac742f46673e9b8314220b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "39313c6e3b85142548fee9b6c130e7b6")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "15eae9e834ed80b24660393c6df87f85")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "8485d8fd5e780e98d720dfbf79f26528")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c423d1d443822dae404239bb9a746b96")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "fb0a6aef430f562ed5e0002d03e0c619")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "efee7bcb89abe36da1cfd8a635d37cd2")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "0e8a3238902a1ff0f0c657fb09b4c022")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5e58d3dcf5ca38f008a64d1c0743ed83")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "1a8e5c85c7935eb1bd2203f5c86ce1db")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c423d1d443822dae404239bb9a746b96")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "6762b39dc027056365280a9d582a6713")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +87,6 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
" -R " + b36KGReference +
|
||||
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" --no_plots" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
UserException.CommandLineException.class);
|
||||
|
|
@ -102,7 +100,6 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
" -R " + b36KGReference +
|
||||
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
|
||||
" -L 1:50,000-80,000" +
|
||||
" --no_plots" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
UserException.class);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -20,6 +20,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
final String REFSAMPLE_NAME = "NA12878";
|
||||
final String MTINTERVALS = "MT:1-1000";
|
||||
final String LSVINTERVALS = "20:40,500,000-41,000,000";
|
||||
final String LSVINTERVALS_SHORT = "20:40,500,000-41,501,000";
|
||||
final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
|
||||
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
|
||||
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
|
||||
|
|
@ -38,6 +39,13 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test_short(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
|
||||
REF, LSV_BAM, LSVINTERVALS_SHORT, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
|
||||
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||
|
|
@ -47,22 +55,26 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0ff90fa3882a3fb5089a7bba50dd8ae3");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","54241500a8ce7df4bedab6e29099dba5");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","5515c6b4249505f78eb54140725e3f72");
|
||||
}
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
PC_LSV_Test(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","90af837f372e3d5143af30bf5c8c2b75");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9514ed15c7030b6d47e04e6a3a2b0a3e");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","38599f7650a44c5ed7bdd19865483b99");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","789a54438553179b9abec1fbe4df754c");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -72,6 +84,6 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "4d16d3c9475637bad70e9dc2eafe2da2");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "d1b48f6f3a175fcba9aec6d427005a45");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
|||
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
|
||||
|
||||
public SAMFileWriterStorage( SAMFileWriterStub stub ) {
|
||||
this(stub,stub.getSAMFile());
|
||||
this(stub,stub.getOutputFile());
|
||||
}
|
||||
|
||||
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
|
||||
|
|
@ -66,7 +66,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
if(stub.getMaxRecordsInRam() != null)
|
||||
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());
|
||||
|
||||
if(stub.getSAMFile() != null) {
|
||||
if(stub.getOutputFile() != null) {
|
||||
try {
|
||||
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
|
||||
}
|
||||
|
|
@ -74,8 +74,8 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
|
||||
}
|
||||
}
|
||||
else if(stub.getSAMOutputStream() != null){
|
||||
this.writer = factory.makeSAMWriter( stub.getFileHeader(), stub.isPresorted(), stub.getSAMOutputStream());
|
||||
else if(stub.getOutputStream() != null){
|
||||
this.writer = factory.makeSAMWriter( stub.getFileHeader(), stub.isPresorted(), stub.getOutputStream());
|
||||
}
|
||||
else
|
||||
throw new UserException("Unable to write to SAM file; neither a target file nor a stream has been specified");
|
||||
|
|
|
|||
|
|
@ -69,9 +69,9 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
* @param stub Stub to use when constructing the output file.
|
||||
*/
|
||||
public VariantContextWriterStorage(VariantContextWriterStub stub) {
|
||||
if ( stub.getFile() != null ) {
|
||||
this.file = stub.getFile();
|
||||
writer = vcfWriterToFile(stub,stub.getFile(),true);
|
||||
if ( stub.getOutputFile() != null ) {
|
||||
this.file = stub.getOutputFile();
|
||||
writer = vcfWriterToFile(stub,stub.getOutputFile(),true);
|
||||
}
|
||||
else if ( stub.getOutputStream() != null ) {
|
||||
this.file = null;
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
* Retrieves the SAM file to (ultimately) be created.
|
||||
* @return The SAM file. Must not be null.
|
||||
*/
|
||||
public File getSAMFile() {
|
||||
public File getOutputFile() {
|
||||
return samFile;
|
||||
}
|
||||
|
||||
|
|
@ -162,7 +162,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
simplifyBAM = v;
|
||||
}
|
||||
|
||||
public OutputStream getSAMOutputStream() {
|
||||
public OutputStream getOutputStream() {
|
||||
return samOutputStream;
|
||||
}
|
||||
|
||||
|
|
@ -220,7 +220,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
|
||||
/**
|
||||
* Gets whether to generate an md5 on-the-fly for this BAM.
|
||||
* @return True generates the md5. False means skip writing the file.
|
||||
* @param generateMD5 True generates the md5. False means skip writing the file.
|
||||
*/
|
||||
public void setGenerateMD5(boolean generateMD5) {
|
||||
if(writeStarted)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,9 @@ package org.broadinstitute.sting.gatk.io.stubs;
|
|||
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* A stub used for managing IO. Acts as a proxy for IO streams
|
||||
* not yet created or streams that need significant external
|
||||
|
|
@ -43,4 +46,14 @@ public interface Stub<StreamType> {
|
|||
* @param outputTracker The connector used to provide an appropriate stream.
|
||||
*/
|
||||
public void register( OutputTracker outputTracker );
|
||||
|
||||
/**
|
||||
* Returns the OutputStream represented by this stub or null if not available.
|
||||
*/
|
||||
public OutputStream getOutputStream();
|
||||
|
||||
/**
|
||||
* Returns the File represented by this stub or null if not available.
|
||||
*/
|
||||
public File getOutputFile();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
* Retrieves the file to (ultimately) be created.
|
||||
* @return The file. Can be null if genotypeStream is not.
|
||||
*/
|
||||
public File getFile() {
|
||||
public File getOutputFile() {
|
||||
return genotypeFile;
|
||||
}
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
* Retrieves the output stearm to which to (ultimately) write.
|
||||
* @return The file. Can be null if genotypeFile is not.
|
||||
*/
|
||||
public PrintStream getOutputStream() {
|
||||
public OutputStream getOutputStream() {
|
||||
return genotypeStream;
|
||||
}
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
|
||||
if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
|
||||
if ( forceBCF || (getOutputFile() != null && VariantContextWriterFactory.isBCFOutput(getOutputFile())) )
|
||||
options.add(Options.FORCE_BCF);
|
||||
|
||||
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
|
||||
|
|
@ -271,7 +271,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
public boolean alsoWriteBCFForTest() {
|
||||
return engine.getArguments().numberOfDataThreads == 1 && // only works single threaded
|
||||
! isCompressed() && // for non-compressed outputs
|
||||
getFile() != null && // that are going to disk
|
||||
getOutputFile() != null && // that are going to disk
|
||||
engine.getArguments().generateShadowBCF; // and we actually want to do it
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -161,6 +161,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
|
||||
protected Boolean USE_ALL_ANNOTATIONS = false;
|
||||
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work.
|
||||
*/
|
||||
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
|
||||
protected Boolean LIST = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ public class BQSRGatherer extends Gatherer {
|
|||
|
||||
@Override
|
||||
public void gather(List<File> inputs, File output) {
|
||||
RecalibrationReport generalReport = null;
|
||||
final PrintStream outputFile;
|
||||
try {
|
||||
outputFile = new PrintStream(output);
|
||||
|
|
@ -57,6 +56,7 @@ public class BQSRGatherer extends Gatherer {
|
|||
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
|
||||
}
|
||||
|
||||
RecalibrationReport generalReport = null;
|
||||
for (File input : inputs) {
|
||||
final RecalibrationReport inputReport = new RecalibrationReport(input);
|
||||
if (generalReport == null)
|
||||
|
|
@ -70,14 +70,15 @@ public class BQSRGatherer extends Gatherer {
|
|||
generalReport.calculateQuantizedQualities();
|
||||
|
||||
RecalibrationArgumentCollection RAC = generalReport.getRAC();
|
||||
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
|
||||
final File recal_out = new File(output.getName() + ".original");
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else if (!RAC.NO_PLOTS) {
|
||||
final File recal_out = new File(output.getName() + ".recal");
|
||||
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
if ( RAC.RECAL_PDF_FILE != null ) {
|
||||
RAC.RECAL_TABLE_FILE = output;
|
||||
if ( RAC.existingRecalibrationReport != null ) {
|
||||
final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
|
||||
}
|
||||
else {
|
||||
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
|
||||
}
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -110,6 +110,7 @@ import java.util.ArrayList;
|
|||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
|
||||
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
|
||||
|
|
@ -150,7 +151,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
RecalUtils.listAvailableCovariates(logger);
|
||||
System.exit(0);
|
||||
}
|
||||
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
RAC.existingRecalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
|
|
@ -169,6 +170,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
}
|
||||
|
||||
try {
|
||||
RAC.RECAL_TABLE = new PrintStream(RAC.RECAL_TABLE_FILE);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
|
||||
}
|
||||
|
||||
int numReadGroups = 0;
|
||||
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
|
||||
numReadGroups += header.getReadGroups().size();
|
||||
|
|
@ -284,7 +291,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
if (!RAC.NO_PLOTS) {
|
||||
if (RAC.RECAL_PDF_FILE != null) {
|
||||
logger.info("Generating recalibration plots...");
|
||||
generatePlots();
|
||||
}
|
||||
|
|
@ -296,10 +303,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
|
||||
if (recalFile != null) {
|
||||
RecalibrationReport report = new RecalibrationReport(recalFile);
|
||||
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
|
||||
}
|
||||
else
|
||||
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -313,14 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
}
|
||||
|
||||
private void generateReport() {
|
||||
PrintStream output;
|
||||
try {
|
||||
output = new PrintStream(RAC.RECAL_FILE);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
|
||||
}
|
||||
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,10 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -59,14 +59,29 @@ public class RecalibrationArgumentCollection {
|
|||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
|
||||
*/
|
||||
@Gather(BQSRGatherer.class)
|
||||
@Output
|
||||
public File RECAL_FILE;
|
||||
@Output(doc = "The output recalibration table file to create", required = true)
|
||||
public File RECAL_TABLE_FILE = null;
|
||||
public PrintStream RECAL_TABLE;
|
||||
|
||||
/**
|
||||
* List all implemented covariates.
|
||||
* If not provided, then no plots will be generated (useful for queue scatter/gathering).
|
||||
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
|
||||
*/
|
||||
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false)
|
||||
public File RECAL_PDF_FILE = null;
|
||||
|
||||
/**
|
||||
* If not provided, then a temporary file is created and then deleted upon completion.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
|
||||
public File RECAL_CSV_FILE = null;
|
||||
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work.
|
||||
*/
|
||||
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
|
||||
public boolean LIST_ONLY = false;
|
||||
|
|
@ -166,14 +181,8 @@ public class RecalibrationArgumentCollection {
|
|||
@Hidden
|
||||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
@Hidden
|
||||
@Argument(fullName = "keep_intermediate_files", shortName = "k", required = false, doc ="does not remove the temporary csv file created to generate the plots")
|
||||
public boolean KEEP_INTERMEDIATE_FILES = false;
|
||||
@Hidden
|
||||
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
|
||||
public boolean NO_PLOTS = false;
|
||||
|
||||
public File recalibrationReport = null;
|
||||
public File existingRecalibrationReport = null;
|
||||
|
||||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
|
|
@ -205,12 +214,10 @@ public class RecalibrationArgumentCollection {
|
|||
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.addRowID("quantizing_levels", true);
|
||||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("keep_intermediate_files", true);
|
||||
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
||||
argumentsTable.addRowID("no_plots", true);
|
||||
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("plot_pdf_file", true);
|
||||
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
|
|
|
|||
|
|
@ -126,7 +126,9 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
@Input(fullName="goldStandard", shortName = "gold", doc="Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison", required=false)
|
||||
public RodBinding<VariantContext> goldStandard = null;
|
||||
|
||||
// Help arguments
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work.
|
||||
*/
|
||||
@Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false)
|
||||
protected Boolean LIST = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -333,8 +333,8 @@ public class RecalUtils {
|
|||
return covariate.getClass().getSimpleName().split("Covariate")[0];
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), RAC.RECAL_TABLE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -362,46 +362,36 @@ public class RecalUtils {
|
|||
report.print(outputFile);
|
||||
}
|
||||
|
||||
private static Pair<PrintStream, File> initializeRecalibrationPlot(File filename) {
|
||||
final PrintStream deltaTableStream;
|
||||
final File deltaTableFileName = new File(filename + ".csv");
|
||||
try {
|
||||
deltaTableStream = new PrintStream(deltaTableFileName);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(deltaTableFileName, "File " + deltaTableFileName + " could not be created");
|
||||
}
|
||||
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
|
||||
}
|
||||
|
||||
private static void outputRecalibrationPlot(final File gatkReportFilename, Pair<PrintStream, File> files, boolean keepIntermediates) {
|
||||
final File csvFileName = files.getSecond();
|
||||
final File plotFileName = new File(csvFileName + ".pdf");
|
||||
files.getFirst().close();
|
||||
private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) {
|
||||
|
||||
final RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
|
||||
executor.addArgs(csvFileName.getAbsolutePath());
|
||||
executor.addArgs(gatkReportFilename.getAbsolutePath());
|
||||
executor.addArgs(plotFileName.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
|
||||
executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
|
||||
executor.exec();
|
||||
|
||||
if (!keepIntermediates)
|
||||
if (!csvFileName.delete())
|
||||
throw new ReviewedStingException("Could not find file " + csvFileName.getAbsolutePath());
|
||||
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
|
||||
outputRecalibrationPlot(filename, files, keepIntermediates);
|
||||
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) {
|
||||
generateRecalibrationPlot(RAC, original, null, requestedCovariates);
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
|
||||
outputRecalibrationPlot(filename, files, keepIntermediates);
|
||||
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
|
||||
final PrintStream csvFile;
|
||||
try {
|
||||
if ( RAC.RECAL_CSV_FILE == null ) {
|
||||
RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
|
||||
RAC.RECAL_CSV_FILE.deleteOnExit();
|
||||
}
|
||||
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
|
||||
}
|
||||
|
||||
if ( recalibrated != null )
|
||||
writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
|
||||
outputRecalibrationPlot(RAC);
|
||||
}
|
||||
|
||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||
|
|
|
|||
|
|
@ -284,14 +284,11 @@ public class RecalibrationReport {
|
|||
else if (argument.equals("quantizing_levels"))
|
||||
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("keep_intermediate_files"))
|
||||
RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value);
|
||||
|
||||
else if (argument.equals("no_plots"))
|
||||
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
|
||||
|
||||
else if (argument.equals("recalibration_report"))
|
||||
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
|
||||
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("plot_pdf_file"))
|
||||
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("binary_tag_name"))
|
||||
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
|
||||
|
|
|
|||
Loading…
Reference in New Issue