Merge fix

This commit is contained in:
Guillermo del Angel 2012-09-14 14:05:21 -04:00
commit ca010160a9
33 changed files with 366 additions and 191 deletions

View File

@ -34,7 +34,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -I " + bam +
" -L " + interval +
args +
" --no_plots" +
" -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
" -o %s";
}
@ -50,21 +49,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "be6c7bc0b79a2d0395d21cd0154540d5")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "65781095beb41d8feca26e93e04dcc0b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8ee1fed1713daca1f36e8b30bee2cd23")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "9449d8a8baac742f46673e9b8314220b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "39313c6e3b85142548fee9b6c130e7b6")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "15eae9e834ed80b24660393c6df87f85")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "8485d8fd5e780e98d720dfbf79f26528")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "c423d1d443822dae404239bb9a746b96")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "fb0a6aef430f562ed5e0002d03e0c619")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "efee7bcb89abe36da1cfd8a635d37cd2")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "0e8a3238902a1ff0f0c657fb09b4c022")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5e58d3dcf5ca38f008a64d1c0743ed83")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "1a8e5c85c7935eb1bd2203f5c86ce1db")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "c423d1d443822dae404239bb9a746b96")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "6762b39dc027056365280a9d582a6713")},
};
}
@ -88,7 +87,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -R " + b36KGReference +
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
" -L 1:10,000,000-10,200,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.CommandLineException.class);
@ -102,7 +100,6 @@ public class BQSRIntegrationTest extends WalkerTest {
" -R " + b36KGReference +
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
" -L 1:50,000-80,000" +
" --no_plots" +
" -o %s",
1, // just one output file
UserException.class);

View File

@ -73,4 +73,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8d092b25f40456e618eef91fdce8adf0"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c29e61810c056b52a47baae0696931ea"));
executeTest("HCTestStructuralIndels: ", spec);
}
}

View File

@ -63,6 +63,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
import java.io.File;
import java.io.OutputStream;
import java.util.*;
/**

View File

@ -50,7 +50,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
private static Logger logger = Logger.getLogger(SAMFileWriterStorage.class);
public SAMFileWriterStorage( SAMFileWriterStub stub ) {
this(stub,stub.getSAMFile());
this(stub,stub.getOutputFile());
}
public SAMFileWriterStorage( SAMFileWriterStub stub, File file ) {
@ -66,7 +66,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
if(stub.getMaxRecordsInRam() != null)
factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());
if(stub.getSAMFile() != null) {
if(stub.getOutputFile() != null) {
try {
this.writer = createBAMWriter(factory,stub.getFileHeader(),stub.isPresorted(),file,stub.getCompressionLevel());
}
@ -74,8 +74,8 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
throw new UserException.CouldNotCreateOutputFile(file,"file could not be created",ex);
}
}
else if(stub.getSAMOutputStream() != null){
this.writer = factory.makeSAMWriter( stub.getFileHeader(), stub.isPresorted(), stub.getSAMOutputStream());
else if(stub.getOutputStream() != null){
this.writer = factory.makeSAMWriter( stub.getFileHeader(), stub.isPresorted(), stub.getOutputStream());
}
else
throw new UserException("Unable to write to SAM file; neither a target file nor a stream has been specified");

View File

@ -69,9 +69,9 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
* @param stub Stub to use when constructing the output file.
*/
public VariantContextWriterStorage(VariantContextWriterStub stub) {
if ( stub.getFile() != null ) {
this.file = stub.getFile();
writer = vcfWriterToFile(stub,stub.getFile(),true);
if ( stub.getOutputFile() != null ) {
this.file = stub.getOutputFile();
writer = vcfWriterToFile(stub,stub.getOutputFile(),true);
}
else if ( stub.getOutputStream() != null ) {
this.file = null;

View File

@ -150,7 +150,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
* Retrieves the SAM file to (ultimately) be created.
* @return The SAM file. Must not be null.
*/
public File getSAMFile() {
public File getOutputFile() {
return samFile;
}
@ -162,7 +162,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
simplifyBAM = v;
}
public OutputStream getSAMOutputStream() {
public OutputStream getOutputStream() {
return samOutputStream;
}
@ -220,7 +220,7 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
/**
* Gets whether to generate an md5 on-the-fly for this BAM.
* @return True generates the md5. False means skip writing the file.
* @param generateMD5 True generates the md5. False means skip writing the file.
*/
public void setGenerateMD5(boolean generateMD5) {
if(writeStarted)

View File

@ -27,6 +27,9 @@ package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import java.io.File;
import java.io.OutputStream;
/**
* A stub used for managing IO. Acts as a proxy for IO streams
* not yet created or streams that need significant external
@ -43,4 +46,14 @@ public interface Stub<StreamType> {
* @param outputTracker The connector used to provide an appropriate stream.
*/
public void register( OutputTracker outputTracker );
/**
* Returns the OutputStream represented by this stub or null if not available.
*/
public OutputStream getOutputStream();
/**
* Returns the File represented by this stub or null if not available.
*/
public File getOutputFile();
}

View File

@ -140,7 +140,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* Retrieves the file to (ultimately) be created.
* @return The file. Can be null if genotypeStream is not.
*/
public File getFile() {
public File getOutputFile() {
return genotypeFile;
}
@ -148,7 +148,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* Retrieves the output stearm to which to (ultimately) write.
* @return The file. Can be null if genotypeFile is not.
*/
public PrintStream getOutputStream() {
public OutputStream getOutputStream() {
return genotypeStream;
}
@ -196,7 +196,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
if ( forceBCF || (getOutputFile() != null && VariantContextWriterFactory.isBCFOutput(getOutputFile())) )
options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
@ -271,7 +271,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
public boolean alsoWriteBCFForTest() {
return engine.getArguments().numberOfDataThreads == 1 && // only works single threaded
! isCompressed() && // for non-compressed outputs
getFile() != null && // that are going to disk
getOutputFile() != null && // that are going to disk
engine.getArguments().generateShadowBCF; // and we actually want to do it
}

View File

@ -161,6 +161,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
protected Boolean USE_ALL_ANNOTATIONS = false;
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false;

View File

@ -49,7 +49,6 @@ public class BQSRGatherer extends Gatherer {
@Override
public void gather(List<File> inputs, File output) {
RecalibrationReport generalReport = null;
final PrintStream outputFile;
try {
outputFile = new PrintStream(output);
@ -57,6 +56,7 @@ public class BQSRGatherer extends Gatherer {
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
}
RecalibrationReport generalReport = null;
for (File input : inputs) {
final RecalibrationReport inputReport = new RecalibrationReport(input);
if (generalReport == null)
@ -70,14 +70,15 @@ public class BQSRGatherer extends Gatherer {
generalReport.calculateQuantizedQualities();
RecalibrationArgumentCollection RAC = generalReport.getRAC();
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".original");
final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
}
else if (!RAC.NO_PLOTS) {
final File recal_out = new File(output.getName() + ".recal");
RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
if ( RAC.RECAL_PDF_FILE != null ) {
RAC.RECAL_TABLE_FILE = output;
if ( RAC.existingRecalibrationReport != null ) {
final RecalibrationReport originalReport = new RecalibrationReport(RAC.existingRecalibrationReport);
RecalUtils.generateRecalibrationPlot(RAC, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
else {
RecalUtils.generateRecalibrationPlot(RAC, generalReport.getRecalibrationTables(), generalReport.getCovariates());
}
}
generalReport.output(outputFile);

View File

@ -50,7 +50,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
@ -110,6 +110,7 @@ import java.util.ArrayList;
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
@ -150,7 +151,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
RecalUtils.listAvailableCovariates(logger);
System.exit(0);
}
RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
RAC.existingRecalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
@ -169,6 +170,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
}
try {
RAC.RECAL_TABLE = new PrintStream(RAC.RECAL_TABLE_FILE);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
}
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
@ -284,7 +291,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
generateReport();
logger.info("...done!");
if (!RAC.NO_PLOTS) {
if (RAC.RECAL_PDF_FILE != null) {
logger.info("Generating recalibration plots...");
generatePlots();
}
@ -296,10 +303,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
}
@ -313,14 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
}
private void generateReport() {
PrintStream output;
try {
output = new PrintStream(RAC.RECAL_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
}
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates);
}
}

View File

@ -28,10 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import java.io.File;
import java.io.PrintStream;
import java.util.Collections;
import java.util.List;
@ -59,14 +59,29 @@ public class RecalibrationArgumentCollection {
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
*/
@Gather(BQSRGatherer.class)
@Output
public File RECAL_FILE;
@Output(doc = "The output recalibration table file to create", required = true)
public File RECAL_TABLE_FILE = null;
public PrintStream RECAL_TABLE;
/**
* List all implemented covariates.
* If not provided, then no plots will be generated (useful for queue scatter/gathering).
* However, we *highly* recommend that users generate these plots whenever possible for QC checking.
*/
@Output(fullName = "plot_pdf_file", shortName = "plots", doc = "The output recalibration pdf file to create", required = false)
public File RECAL_PDF_FILE = null;
/**
* If not provided, then a temporary file is created and then deleted upon completion.
*/
@Hidden
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
public File RECAL_CSV_FILE = null;
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
public boolean LIST_ONLY = false;
@ -166,14 +181,8 @@ public class RecalibrationArgumentCollection {
@Hidden
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Argument(fullName = "keep_intermediate_files", shortName = "k", required = false, doc ="does not remove the temporary csv file created to generate the plots")
public boolean KEEP_INTERMEDIATE_FILES = false;
@Hidden
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
public boolean NO_PLOTS = false;
public File recalibrationReport = null;
public File existingRecalibrationReport = null;
public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
@ -205,12 +214,10 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
argumentsTable.addRowID("quantizing_levels", true);
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
argumentsTable.addRowID("keep_intermediate_files", true);
argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
argumentsTable.addRowID("no_plots", true);
argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
argumentsTable.addRowID("plot_pdf_file", true);
argumentsTable.set("plot_pdf_file", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RECAL_PDF_FILE == null ? "null" : RECAL_PDF_FILE.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -198,23 +197,7 @@ public class UnifiedGenotyperEngine {
}
}
return addMissingSamples(results, allSamples);
}
private List<VariantCallContext> addMissingSamples(final List<VariantCallContext> calls, final Set<String> allSamples) {
if ( calls.isEmpty() || allSamples == null ) return calls;
final List<VariantCallContext> withAllSamples = new ArrayList<VariantCallContext>(calls.size());
for ( final VariantCallContext call : calls ) {
if ( call == null )
withAllSamples.add(null);
else {
final VariantContext withoutMissing = VariantContextUtils.addMissingSamples(call, allSamples);
withAllSamples.add(new VariantCallContext(withoutMissing, call.confidentlyCalled, call.shouldEmit));
}
}
return withAllSamples;
return results;
}
/**

View File

@ -126,7 +126,9 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Input(fullName="goldStandard", shortName = "gold", doc="Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison", required=false)
public RodBinding<VariantContext> goldStandard = null;
// Help arguments
/**
* Note that the --list argument requires a fully resolved and correct command-line to work.
*/
@Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false)
protected Boolean LIST = false;

View File

@ -313,7 +313,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
VariantContextUtils.calculateChromosomeCounts(builder, false);
if ( minimalVCF )
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
vcfWriter.add(VariantContextUtils.addMissingSamples(builder.make(), samples));
vcfWriter.add(builder.make());
}
return vcs.isEmpty() ? 0 : 1;

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
@ -325,6 +326,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
private int maxIndelSize = Integer.MAX_VALUE;
@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
/* Private class used to store the intermediate variants in the integer random selection process */
private static class RandomVariantStructure {
@ -386,10 +390,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
// first, add any requested samples
samples.addAll(samplesFromFile);
samples.addAll(samplesFromExpressions);
// first, check overlap between requested and present samples
Set<String> commandLineUniqueSamples = new HashSet<String>(samplesFromFile.size()+samplesFromExpressions.size()+sampleNames.size());
commandLineUniqueSamples.addAll(samplesFromFile);
commandLineUniqueSamples.addAll(samplesFromExpressions);
commandLineUniqueSamples.addAll(sampleNames);
commandLineUniqueSamples.removeAll(vcfSamples);
// second, add the requested samples
samples.addAll(sampleNames);
samples.addAll(samplesFromExpressions);
samples.addAll(samplesFromFile);
logger.debug(Utils.join(",",commandLineUniqueSamples));
if ( commandLineUniqueSamples.size() > 0 && ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES ) {
logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored.");
samples.removeAll(commandLineUniqueSamples);
} else if (commandLineUniqueSamples.size() > 0 ) {
throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s",
"Samples entered on command line (through -sf or -sn) that are not present in the VCF.",
"A list of these samples:",
Utils.join(",",commandLineUniqueSamples),
"To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES"));
}
// if none were requested, we want all of them
if ( samples.isEmpty() ) {

View File

@ -7,7 +7,9 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -15,6 +17,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -278,7 +281,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
private byte getFlippedEncoding(Genotype g, int offset) {
byte b;
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
if ( ! checkGQIsGood(g) ) {
b = NO_CALL;
} else if ( g.isHomRef() ) {
b = HOM_VAR;
@ -293,6 +296,16 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
return (byte) (b << (2*offset));
}
private boolean checkGQIsGood(Genotype genotype) {
if ( genotype.hasGQ() ) {
return genotype.getGQ() >= minGenotypeQuality;
} else if ( genotype.hasLikelihoods() ) {
return GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()) >= minGenotypeQuality;
}
return false;
}
private static String getID(VariantContext v) {
if ( v.hasID() ) {
return v.getID();

View File

@ -246,7 +246,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
}
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vc = VariantContextUtils.addMissingSamples(vc, samples);
vcfwriter.add(vc);
}

View File

@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
case UNBOUNDED: return -1;
case A: return vc.getNAlleles() - 1;
case G:
final int ploidy = vc.getMaxPloidy();
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy);
final int ploidy = vc.getMaxPloidy(2);
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy);
default:
throw new ReviewedStingException("Unknown count type: " + countType);
}

View File

@ -47,7 +47,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
@ -333,8 +333,8 @@ public class RecalUtils {
return covariate.getClass().getSimpleName().split("Covariate")[0];
}
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), RAC.RECAL_TABLE);
}
/**
@ -362,46 +362,36 @@ public class RecalUtils {
report.print(outputFile);
}
private static Pair<PrintStream, File> initializeRecalibrationPlot(File filename) {
final PrintStream deltaTableStream;
final File deltaTableFileName = new File(filename + ".csv");
try {
deltaTableStream = new PrintStream(deltaTableFileName);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(deltaTableFileName, "File " + deltaTableFileName + " could not be created");
}
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
}
private static void outputRecalibrationPlot(final File gatkReportFilename, Pair<PrintStream, File> files, boolean keepIntermediates) {
final File csvFileName = files.getSecond();
final File plotFileName = new File(csvFileName + ".pdf");
files.getFirst().close();
private static void outputRecalibrationPlot(final RecalibrationArgumentCollection RAC) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
executor.addArgs(csvFileName.getAbsolutePath());
executor.addArgs(gatkReportFilename.getAbsolutePath());
executor.addArgs(plotFileName.getAbsolutePath());
executor.addArgs(RAC.RECAL_CSV_FILE.getAbsolutePath());
executor.addArgs(RAC.RECAL_TABLE_FILE.getAbsolutePath());
executor.addArgs(RAC.RECAL_PDF_FILE.getAbsolutePath());
executor.exec();
if (!keepIntermediates)
if (!csvFileName.delete())
throw new ReviewedStingException("Could not find file " + csvFileName.getAbsolutePath());
}
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
outputRecalibrationPlot(filename, files, keepIntermediates);
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final Covariate[] requestedCovariates) {
generateRecalibrationPlot(RAC, original, null, requestedCovariates);
}
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
outputRecalibrationPlot(filename, files, keepIntermediates);
public static void generateRecalibrationPlot(final RecalibrationArgumentCollection RAC, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates) {
final PrintStream csvFile;
try {
if ( RAC.RECAL_CSV_FILE == null ) {
RAC.RECAL_CSV_FILE = File.createTempFile("BQSR", ".csv");
RAC.RECAL_CSV_FILE.deleteOnExit();
}
csvFile = new PrintStream(RAC.RECAL_CSV_FILE);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_CSV_FILE, e);
}
if ( recalibrated != null )
writeCSV(csvFile, recalibrated, "RECALIBRATED", requestedCovariates, true);
writeCSV(csvFile, original, "ORIGINAL", requestedCovariates, recalibrated == null);
outputRecalibrationPlot(RAC);
}
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {

View File

@ -284,14 +284,11 @@ public class RecalibrationReport {
else if (argument.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
else if (argument.equals("keep_intermediate_files"))
RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value);
else if (argument.equals("no_plots"))
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("plot_pdf_file"))
RAC.RECAL_PDF_FILE = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;

View File

@ -53,6 +53,9 @@ import java.util.*;
*/
@Invariant({"alleles != null"})
public final class GenotypeBuilder {
private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
private String sampleName = null;
private List<Allele> alleles = Collections.emptyList();
@ -90,6 +93,23 @@ public final class GenotypeBuilder {
return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
}
/**
* Create a new Genotype object for a sample that's missing from the VC (i.e., in
* the output header). Defaults to a diploid no call genotype ./.
*
* @param sampleName the name of this sample
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
*/
public static Genotype createMissing(final String sampleName, final int ploidy) {
final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
switch ( ploidy ) {
case 1: builder.alleles(HAPLOID_NO_CALL); break;
case 2: builder.alleles(DIPLOID_NO_CALL); break;
default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
}
return builder.make();
}
/**
* Create a empty builder. Both a sampleName and alleles must be provided
* before trying to make a Genotype from this builder.

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
public class GenotypeLikelihoods {
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
@ -167,10 +168,36 @@ public class GenotypeLikelihoods {
//Return the neg log10 Genotype Quality (GQ) for the given genotype
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
/**
* This is really dangerous and returns completely wrong results for genotypes from a multi-allelic context.
* Use getLog10GQ(Genotype,VariantContext) or getLog10GQ(Genotype,List<Allele>) in place of it.
*
* If you **know** you're biallelic, use getGQLog10FromLikelihoods directly.
* @param genotype - actually a genotype type (no call, hom ref, het, hom var)
* @return an unsafe quantity that could be negative. In the bi-allelic case, the GQ resulting from best minus next best (if the type is the best).
*/
@Deprecated
public double getLog10GQ(GenotypeType genotype){
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
}
@Requires({"genotypeAlleles != null","genotypeAlleles.size()==2","contextAlleles != null","contextAlleles.size() >= 1"})
private double getLog10GQ(List<Allele> genotypeAlleles,List<Allele> contextAlleles) {
int allele1Index = contextAlleles.indexOf(genotypeAlleles.get(0));
int allele2Index = contextAlleles.indexOf(genotypeAlleles.get(1));
int plIndex = calculatePLindex(allele1Index,allele2Index);
return getGQLog10FromLikelihoods(plIndex,getAsVector());
}
public double getLog10GQ(Genotype genotype, List<Allele> vcAlleles ) {
return getLog10GQ(genotype.getAlleles(),vcAlleles);
}
public double getLog10GQ(Genotype genotype, VariantContext context) {
return getLog10GQ(genotype,context.getAlleles());
}
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
if(likelihoods == null)
return Double.NEGATIVE_INFINITY;

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import java.util.*;
@ -413,14 +412,26 @@ public class GenotypesContext implements List<Genotype> {
return getGenotypes().get(i);
}
/**
* What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present
*
* @param defaultPloidy the default ploidy, if all samples are no-called
* @return
*/
@Ensures("result >= 0")
public int getMaxPloidy() {
public int getMaxPloidy(final int defaultPloidy) {
if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0");
if ( maxPloidy == -1 ) {
maxPloidy = 0; // necessary in the case where there are no genotypes
for ( final Genotype g : getGenotypes() ) {
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
}
// everything is no called so we return the default ploidy
if ( maxPloidy == 0 ) maxPloidy = defaultPloidy;
}
return maxPloidy;
}

View File

@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
/**
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes
* Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
*
* This function is caching, so it's only expensive on the first call
*
* @return -1, or the max ploidy
* @param defaultPloidy the default ploidy, if all samples are no-called
* @return default, or the max ploidy
*/
public int getMaxPloidy() {
return genotypes.getMaxPloidy();
public int getMaxPloidy(final int defaultPloidy) {
return genotypes.getMaxPloidy(defaultPloidy);
}
/**

View File

@ -32,8 +32,8 @@ import org.apache.log4j.Logger;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -47,7 +47,6 @@ public class VariantContextUtils {
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_PREFIX = "filterIn";
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
private static Set<String> MISSING_KEYS_WARNED_ABOUT = new HashSet<String>();
final public static JexlEngine engine = new JexlEngine();
@ -60,31 +59,6 @@ public class VariantContextUtils {
engine.setDebug(false);
}
/**
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
* the resulting VC with default diploid ./. genotypes
*
* @param vc the VariantContext
* @param allSamples all of the samples needed
* @return a new VariantContext with missing samples added
*/
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
// TODO -- what's the fastest way to do this calculation?
final Set<String> missingSamples = new HashSet<String>(allSamples);
missingSamples.removeAll(vc.getSampleNames());
if ( missingSamples.isEmpty() )
return vc;
else {
//logger.warn("Adding " + missingSamples.size() + " missing samples to called context");
final GenotypesContext gc = GenotypesContext.copy(vc.getGenotypes());
for ( final String missing : missingSamples ) {
gc.add(new GenotypeBuilder(missing).alleles(DIPLOID_NO_CALL).make());
}
return new VariantContextBuilder(vc).genotypes(gc).make();
}
}
/**
* Update the attributes of the attributes map given the VariantContext to reflect the
* proper chromosome-based VCF tags

View File

@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter {
encodingType = BCF2Type.INT8;
buildAlleleMap(vc);
nValuesPerGenotype = vc.getMaxPloidy();
// deal with the case where we have no call everywhere, in which case we write out diploid
if ( nValuesPerGenotype == -1 )
nValuesPerGenotype = 2;
nValuesPerGenotype = vc.getMaxPloidy(2);
super.start(encoder, vc);
}

View File

@ -32,7 +32,10 @@ import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFContigHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -345,10 +348,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
assert writer != null;
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null ) VCFWriter.missingSampleError(vc, header);
if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -339,13 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter {
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
final int ploidy = vc.getMaxPloidy(2);
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
Genotype g = vc.getGenotype(sample);
if ( g == null ) {
missingSampleError(vc, mHeader);
}
if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
for ( String field : genotypeFormatKeys ) {
@ -426,13 +425,6 @@ class VCFWriter extends IndexingVariantContextWriter {
}
}
public static final void missingSampleError(final VariantContext vc, final VCFHeader header) {
final List<String> badSampleNames = new ArrayList<String>();
for ( final String x : header.getGenotypeSamples() )
if ( ! vc.hasGenotype(x) ) badSampleNames.add(x);
throw new ReviewedStingException("BUG: we now require all samples in VCFheader to have genotype objects. Missing samples are " + Utils.join(",", badSampleNames));
}
private boolean isMissingValue(String s) {
// we need to deal with the case that it's a list of missing values
return (countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + countOccurrences(',', s) == s.length());

View File

@ -70,6 +70,20 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testComplexSelection--" + testfile, spec);
}
@Test
public void testComplexSelectionWithNonExistingSamples() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES -sn A -se '[CDH]' -sn Z -sn T -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1,
Arrays.asList("4386fbb258dcef4437495a37f5a83c53")
);
spec.disableShadowBCF();
executeTest("testComplexSelectionWithNonExistingSamples--" + testfile, spec);
}
@Test
public void testNonExistingFieldSelection() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
@ -98,6 +112,21 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testSampleExclusion--" + testfile, spec);
}
@Test
public void testSampleInclusionWithNonexistingSamples() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -sn A -sn Z -sn Q -sf " + samplesFile + " --variant " + testfile,
1,
UserException.BadInput.class
);
spec.disableShadowBCF();
executeTest("testSampleInclusionWithNonexistingSamples--" + testfile, spec);
}
@Test
public void testConcordance() {

View File

@ -29,12 +29,15 @@ package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
/**
@ -44,6 +47,7 @@ public class GenotypeLikelihoodsUnitTest {
double [] v = new double[]{-10.5, -1.25, -5.11};
final static String vGLString = "-10.50,-1.25,-5.11";
final static String vPLString = "93,0,39";
double[] triAllelic = new double[]{-4.2,-2.0,-3.0,-1.6,0.0,-4.0}; //AA,AB,AC,BB,BC,CC
@Test
public void testFromVector2() {
@ -139,6 +143,28 @@ public class GenotypeLikelihoodsUnitTest {
}
}
// this test is completely broken, the method is wrong.
public void testGetQualFromLikelihoodsMultiAllelicBroken() {
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
double actualGQ = gl.getLog10GQ(GenotypeType.HET);
double expectedGQ = 1.6;
Assert.assertEquals(actualGQ,expectedGQ);
}
public void testGetQualFromLikelihoodsMultiAllelic() {
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
Allele ref = Allele.create(BaseUtils.A,true);
Allele alt1 = Allele.create(BaseUtils.C);
Allele alt2 = Allele.create(BaseUtils.T);
List<Allele> allAlleles = Arrays.asList(ref,alt1,alt2);
List<Allele> gtAlleles = Arrays.asList(alt1,alt2);
GenotypeBuilder gtBuilder = new GenotypeBuilder();
gtBuilder.alleles(gtAlleles);
double actualGQ = gl.getLog10GQ(gtBuilder.make(),allAlleles);
double expectedGQ = 1.6;
Assert.assertEquals(actualGQ,expectedGQ);
}
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
Assert.assertEquals(v1.length, v2.length);
for ( int i = 0; i < v1.length; i++ ) {

View File

@ -596,6 +596,51 @@ public class VariantContextTestProvider {
return TEST_DATAs;
}
public static void testReaderWriterWithMissingGenotypes(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
final int nSamples = data.header.getNGenotypeSamples();
if ( nSamples > 2 ) {
for ( final VariantContext vc : data.vcs )
if ( vc.isSymbolic() )
// cannot handle symbolic alleles because they may be weird non-call VCFs
return;
final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
tmpFile.deleteOnExit();
// write expected to disk
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
final Set<String> samplesInVCF = new HashSet<String>(data.header.getGenotypeSamples());
final List<String> missingSamples = Arrays.asList("MISSING1", "MISSING2");
final List<String> allSamples = new ArrayList<String>(missingSamples);
allSamples.addAll(samplesInVCF);
final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples);
writeVCsToFile(writer, header, data.vcs);
// ensure writing of expected == actual
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec());
final Iterable<VariantContext> actual = p.getSecond();
int i = 0;
for ( final VariantContext readVC : actual ) {
if ( readVC == null ) continue; // sometimes we read null records...
final VariantContext expected = data.vcs.get(i++);
for ( final Genotype g : readVC.getGenotypes() ) {
Assert.assertTrue(allSamples.contains(g.getSampleName()));
if ( samplesInVCF.contains(g.getSampleName()) ) {
assertEquals(g, expected.getGenotype(g.getSampleName()));
} else {
// missing
Assert.assertTrue(g.isNoCall());
}
}
}
}
}
public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
testReaderWriter(tester, data.header, data.vcs, data.vcs, true);
}

View File

@ -82,6 +82,11 @@ public class VariantContextWritersUnitTest extends BaseTest {
VariantContextTestProvider.testReaderWriter(new BCFIOTester(), testData);
}
@Test(dataProvider = "VariantContextTest_SingleContexts")
public void testBCF2WriterReaderMissingGenotypes(final VariantContextTestProvider.VariantContextTestData testData) throws IOException {
VariantContextTestProvider.testReaderWriterWithMissingGenotypes(new BCFIOTester(), testData);
}
private class BCFIOTester extends VariantContextTestProvider.VariantContextIOTest {
@Override
public String getExtension() {
@ -110,6 +115,11 @@ public class VariantContextWritersUnitTest extends BaseTest {
VariantContextTestProvider.testReaderWriter(new VCFIOTester(), testData);
}
@Test(enabled = true, dataProvider = "VariantContextTest_SingleContexts")
public void testVCF4WriterReaderMissingGenotypes(final VariantContextTestProvider.VariantContextTestData testData) throws IOException {
VariantContextTestProvider.testReaderWriterWithMissingGenotypes(new VCFIOTester(), testData);
}
private class VCFIOTester extends VariantContextTestProvider.VariantContextIOTest {
@Override
public String getExtension() {