Minor tweaks and updated integration tests MD5s
This commit is contained in:
parent
6d03bce0d3
commit
c1f52b773a
|
|
@ -109,7 +109,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering", required=false)
|
||||
private double TS_FILTER_LEVEL = 99.0;
|
||||
protected double TS_FILTER_LEVEL = 99.0;
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
|
||||
|
|
@ -183,58 +183,69 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
|
||||
if( vc != null ) {
|
||||
final List<VariantContext> VCs = tracker.getValues(input, context.getLocation());
|
||||
final List<VariantContext> recals = tracker.getValues(recal, context.getLocation());
|
||||
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
for( final VariantContext vc : VCs ) {
|
||||
|
||||
final VariantContext recalDatum = tracker.getFirstValue(recal, context.getLocation());
|
||||
if( recalDatum == null ) {
|
||||
throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc );
|
||||
}
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
|
||||
final double lod = recalDatum.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY, Double.NEGATIVE_INFINITY);
|
||||
if( lod == Double.NEGATIVE_INFINITY ) {
|
||||
throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc );
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
String filterString = null;
|
||||
|
||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
||||
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if( i == tranches.size() - 1 ) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
filterString = tranche.name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( filterString == null ) {
|
||||
filterString = tranches.get(0).name+"+";
|
||||
}
|
||||
|
||||
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
builder.filters(filterString);
|
||||
}
|
||||
|
||||
vcfWriter.add( builder.make() );
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc );
|
||||
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
|
||||
if( recalDatum == null ) {
|
||||
throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc );
|
||||
}
|
||||
|
||||
final double lod = recalDatum.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY, Double.NEGATIVE_INFINITY);
|
||||
if( lod == Double.NEGATIVE_INFINITY ) {
|
||||
throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc );
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
String filterString = null;
|
||||
|
||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
||||
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if( i == tranches.size() - 1 ) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
filterString = tranche.name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( filterString == null ) {
|
||||
filterString = tranches.get(0).name+"+";
|
||||
}
|
||||
|
||||
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
builder.filters(filterString);
|
||||
}
|
||||
|
||||
vcfWriter.add( builder.make() );
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc );
|
||||
}
|
||||
}
|
||||
|
||||
return 1; // This value isn't used for anything
|
||||
}
|
||||
|
||||
private static VariantContext getMatchingRecalVC(final VariantContext target, final List<VariantContext> recalVCs) {
|
||||
for( final VariantContext recalVC : recalVCs ) {
|
||||
if ( target.getEnd() == recalVC.getEnd() ) {
|
||||
return recalVC;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
|
|||
|
|
@ -299,9 +299,6 @@ public class VariantDataManager {
|
|||
alleles.add(Allele.create("N", true));
|
||||
alleles.add(Allele.create("<VQSR>", false));
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader( null, Collections.<String>emptySet() );
|
||||
recalWriter.writeHeader(vcfHeader);
|
||||
|
||||
// to be used for the important INFO tags
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>(3);
|
||||
|
||||
|
|
@ -310,7 +307,7 @@ public class VariantDataManager {
|
|||
attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
||||
attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles).attributes(attributes);
|
||||
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStart(), alleles).attributes(attributes);
|
||||
recalWriter.add(builder.make());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,8 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
|
|
@ -137,9 +138,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Outputs
|
||||
/////////////////////////////
|
||||
@Output(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
|
||||
private VCFWriter recalWriter;
|
||||
protected File recalFile = null;
|
||||
protected StandardVCFWriter recalWriter = null;
|
||||
|
||||
@Output(fullName="tranches_file", shortName="tranchesFile", doc="The output tranches file used by ApplyRecalibration", required=true)
|
||||
private File TRANCHES_FILE;
|
||||
protected File TRANCHES_FILE;
|
||||
|
||||
/////////////////////////////
|
||||
// Additional Command Line Arguments
|
||||
|
|
@ -151,7 +154,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
* that this parameter is used for display purposes only and isn't used anywhere in the algorithm!
|
||||
*/
|
||||
@Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!", required=false)
|
||||
private double TARGET_TITV = 2.15;
|
||||
protected double TARGET_TITV = 2.15;
|
||||
|
||||
/**
|
||||
* See the input VCF file's INFO field for a list of all available annotations.
|
||||
|
|
@ -171,7 +174,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Output(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||
private File RSCRIPT_FILE = null;
|
||||
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering, used here to indicate filtered variants in the model reporting plots", required=false)
|
||||
private double TS_FILTER_LEVEL = 99.0;
|
||||
protected double TS_FILTER_LEVEL = 99.0;
|
||||
|
||||
/////////////////////////////
|
||||
// Debug Arguments
|
||||
|
|
@ -225,6 +228,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
if( !dataManager.checkHasTruthSet() ) {
|
||||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader( null, Collections.<String>emptySet() );
|
||||
recalWriter = new StandardVCFWriter(recalFile, getMasterSequenceDictionary(), false);
|
||||
recalWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"58780f63182e139fdbe17f6c18b5b774", // recal file
|
||||
"f8e21a1987960b950db1f0d98be45352", // recal file
|
||||
"f67d844b6252a55452cf4167b77530b1"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"6d7ee4cb651c8b666e4a4523363caaff", // tranches
|
||||
"4759b111a5aa53975d46e0f22c7983bf", // recal file
|
||||
"ee5b408c8434a594496118875690c438", // recal file
|
||||
"5d7e07d8813db96ba3f3dfe4737f83d1"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
|
|
|
|||
Loading…
Reference in New Issue