Updating VQSR to work with the MNP and symbolic variants that are coming out of the HaplotypeCaller. Integration tests change because of the MNPs in dbSNP.

This commit is contained in:
Ryan Poplin 2012-08-03 10:44:36 -04:00
parent 6f7a236cfc
commit f40d0a0a28
4 changed files with 34 additions and 13 deletions

View File

@ -196,7 +196,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
for( final VariantContext vc : VCs ) {
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
if( recalDatum == null ) {

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -273,11 +274,37 @@ public class VariantDataManager {
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
}
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
switch( trainVC.getType() ) {
case SNP:
case MNP:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
case INDEL:
case MIXED:
case SYMBOLIC:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
default:
return false;
}
}
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
switch( mode ) {
case SNP:
return evalVC.isSNP() || evalVC.isMNP();
case INDEL:
return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
case BOTH:
return true;
default:
throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
}
}
public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
// we need to sort in coordinate order in order to produce a valid VCF
Collections.sort( data, new Comparator<VariantDatum>() {

View File

@ -245,7 +245,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
if( VariantDataManager.checkVariationClass( vc, VRAC.MODE ) ) {
final VariantDatum datum = new VariantDatum();
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
@ -268,12 +268,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
return mapList;
}
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
}
//---------------------------------------------------------------------------------------------------------------
//
// reduce

View File

@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"62f81e7d2082fbc71cae0101c27fefad", // tranches
"b9709e4180e56abc691b208bd3e8626c", // recal file
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
"f360ce3eb2b0b887301be917a9843e2b", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {