Better error message for the case of input variants found in ApplyRecalibration that were never seen during VariantRecalibrator.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-06-13 14:45:28 +00:00
parent 6231bba288
commit 3534f412c9
4 changed files with 31 additions and 28 deletions

View File

@ -170,35 +170,36 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
if( vc != null ) {
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) ) {
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
String filterString = null;
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
final Double lod = (Double) lodMap.get( ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop() );
if( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) {
attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
for( int i = tranches.size() - 1; i >= 0; i-- ) {
final Tranche tranche = tranches.get(i);
if( lod >= tranche.minVQSLod ) {
if( i == tranches.size() - 1 ) {
filterString = VCFConstants.PASSES_FILTERS_v4;
} else {
filterString = tranche.name;
}
break;
final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() );
if( lod == null ) {
throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc );
}
attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
for( int i = tranches.size() - 1; i >= 0; i-- ) {
final Tranche tranche = tranches.get(i);
if( lod >= tranche.minVQSLod ) {
if( i == tranches.size() - 1 ) {
filterString = VCFConstants.PASSES_FILTERS_v4;
} else {
filterString = tranche.name;
}
}
if( filterString == null ) {
filterString = tranches.get(0).name+"+";
}
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
final Set<String> filters = new HashSet<String>();
filters.add(filterString);
vc = VariantContext.modifyFilters(vc, filters);
break;
}
}
if( filterString == null ) {
filterString = tranches.get(0).name+"+";
}
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
final Set<String> filters = new HashSet<String>();
filters.add(filterString);
vc = VariantContext.modifyFilters(vc, filters);
}
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
vcfWriter.add( vc, ref.getBase() );

View File

@ -253,7 +253,7 @@ public class VariantDataManager {
public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) {
for( final VariantDatum datum : data ) {
RECAL_FILE.println(String.format("%s,%d,%d,%.4f", datum.pos.getContig(), datum.pos.getStart(), datum.pos.getStop(), datum.lod));
RECAL_FILE.println(String.format("%s,%d,%d,%.4f", datum.contig, datum.start, datum.stop, datum.lod));
}
}
}

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
@ -22,8 +20,10 @@ public class VariantDatum implements Comparable<VariantDatum> {
public double originalQual;
public double prior;
public int consensusCount;
public GenomeLoc pos;
public int usedForTraining;
public String contig;
public int start;
public int stop;
public MultivariateGaussian assignment; // used in K-means implementation
public int compareTo( final VariantDatum other ) {

View File

@ -168,7 +168,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
final VariantDatum datum = new VariantDatum();
dataManager.decodeAnnotations( datum, vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
datum.pos = context.getLocation();
datum.contig = vc.getChr();
datum.start = vc.getStart();
datum.stop = vc.getEnd();
datum.originalQual = vc.getPhredScaledQual();
datum.isSNP = vc.isSNP() && vc.isBiallelic();
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);