UG: small bug fix when creating empty variant contexts in UG for the -EMIT_ALL_SITES to allow indels.

GAV: First version of the walker that validates reads from a BAM file based on an annotated VCF with TP/FP annotations. 


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5396 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-03-07 22:51:04 +00:00
parent 9384b2ff65
commit 02006954bc
2 changed files with 28 additions and 27 deletions

View File

@ -210,7 +210,7 @@ public class UnifiedGenotyperEngine {
private VariantContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, AlignmentContext rawContext) { private VariantContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
VariantContext vc; VariantContext vc;
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), true); final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), false);
if ( vcInput == null ) if ( vcInput == null )
return null; return null;
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()); vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());

View File

@ -51,9 +51,14 @@ import java.util.*;
* @help.summary Validates the calls on a ROD track using a BAM dataset. * @help.summary Validates the calls on a ROD track using a BAM dataset.
*/ */
//@Requires(value={DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_ORDERED_DATA}) @Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="alleles",type=VariantContext.class))
@Allows(value={DataSource.READS, DataSource.REFERENCE}) @Allows(value={DataSource.READS, DataSource.REFERENCE})
// Ugly fix because RodWalkers don't have access to reads
@By(DataSource.REFERENCE)
@Reference(window=@Window(start=-200,stop=200))
public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> { public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> {
@Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false) @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
@ -70,17 +75,12 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
private UnifiedGenotyperEngine snpEngine; private UnifiedGenotyperEngine snpEngine;
private UnifiedGenotyperEngine indelEngine; private UnifiedGenotyperEngine indelEngine;
public static enum VARIANT_TYPE {
TRUE_POSITIVE,
FALSE_POSITIVE,
}
public static class CountedData { public static class CountedData {
private long numTP = 0L; private long numTP = 0L;
private long numTN = 0L; private long numTN = 0L;
private long numFP = 0L; private long numFP = 0L;
private long numFN = 0L; private long numFN = 0L;
private long numUncovered = 0L;
private long numConfidentCalls = 0L; private long numConfidentCalls = 0L;
private long numNotConfidentCalls = 0L; private long numNotConfidentCalls = 0L;
@ -93,6 +93,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
numTN += other.numTN; numTN += other.numTN;
numFP += other.numFP; numFP += other.numFP;
numFN += other.numFN; numFN += other.numFN;
numUncovered += other.numUncovered;
numNotConfidentCalls += other.numNotConfidentCalls; numNotConfidentCalls += other.numNotConfidentCalls;
numConfidentCalls += other.numConfidentCalls; numConfidentCalls += other.numConfidentCalls;
} }
@ -118,8 +119,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
if (mbq > 0) uac.MIN_BASE_QUALTY_SCORE = mbq; if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
if (deletions > 0) uac.MAX_DELETION_FRACTION = deletions; if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac); snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
// Adding the INDEL calling arguments for UG // Adding the INDEL calling arguments for UG
@ -142,38 +143,37 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
return counter; return counter;
VariantContext vcComp = tracker.getVariantContext(ref, compName, null, context.getLocation(), false); VariantContext vcComp = tracker.getVariantContext(ref, compName, null, context.getLocation(), false);
if( vcComp == null ) if( vcComp == null ){
return counter; return counter;
vcComp = vcComp.subContextFromGenotypes( vcComp.getGenotypes().values() ); }
if ((vcComp.getFilters().contains("TP") && vcComp.getFilters().contains("FP")) || (!vcComp.getFilters().contains("TP") && !vcComp.getFilters().contains("FP"))) if ((vcComp.getFilters().contains("TP") && vcComp.getFilters().contains("FP")) || (!vcComp.getFilters().contains("TP") && !vcComp.getFilters().contains("FP")))
throw new UserException.BadInput("Variant has the wrong filter annotation -- either missing FP/TP or has both. " + vcComp.getChr() + ":" + vcComp.getStart()); throw new UserException.BadInput("Variant has the wrong filter annotation -- either missing FP/TP or has both. " + vcComp.getChr() + ":" + vcComp.getStart());
VariantCallContext call; VariantCallContext call;
if ( vcComp.isSNP() ) if ( vcComp.isSNP() )
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
else if ( vcComp.isIndel() ) else if ( vcComp.isIndel() )
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
else {
else logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
return counter; return counter;
}
VARIANT_TYPE variantType;
if (vcComp.getFilters().contains("TP"))
variantType = VARIANT_TYPE.TRUE_POSITIVE;
else
variantType = VARIANT_TYPE.FALSE_POSITIVE;
if (!call.confidentlyCalled) { if (!call.confidentlyCalled) {
if (context.hasReads()) {
counter.numNotConfidentCalls = 1L; counter.numNotConfidentCalls = 1L;
if (variantType == VARIANT_TYPE.TRUE_POSITIVE) if (vcComp.getFilters().contains("TP"))
counter.numFN = 1L; counter.numFN = 1L;
else else
counter.numTN = 1L; counter.numTN = 1L;
} }
else
counter.numUncovered = 1L;
}
else { else {
counter.numConfidentCalls = 1L; counter.numConfidentCalls = 1L;
if (variantType == VARIANT_TYPE.TRUE_POSITIVE) if (vcComp.getFilters().contains("TP"))
counter.numTP = 1L; counter.numTP = 1L;
else else
counter.numFP = 1L; counter.numFP = 1L;
@ -201,7 +201,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
logger.info("TP = " + reduceSum.numTP); logger.info("TP = " + reduceSum.numTP);
logger.info("TN = " + reduceSum.numTN); logger.info("TN = " + reduceSum.numTN);
logger.info("FP = " + reduceSum.numFP); logger.info("FP = " + reduceSum.numFP);
logger.info("FN = " + (reduceSum.numFN) ); logger.info("FN = " + reduceSum.numFN);
logger.info("Uncovered = " + reduceSum.numUncovered);
logger.info("confidently called = " + reduceSum.numConfidentCalls); logger.info("confidently called = " + reduceSum.numConfidentCalls);
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls ); logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
} }