UG: small bug fix when creating empty variant contexts in UG for the -EMIT_ALL_SITES to allow indels.

GAV: First version of the walker that validates reads from a BAM file based on an annotated VCF with TP/FP annotations. 


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5396 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-03-07 22:51:04 +00:00
parent 9384b2ff65
commit 02006954bc
2 changed files with 28 additions and 27 deletions

View File

@ -210,7 +210,7 @@ public class UnifiedGenotyperEngine {
private VariantContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
VariantContext vc;
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), true);
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), false);
if ( vcInput == null )
return null;
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());

View File

@ -51,9 +51,14 @@ import java.util.*;
* @help.summary Validates the calls on a ROD track using a BAM dataset.
*/
//@Requires(value={DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_ORDERED_DATA})
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="alleles",type=VariantContext.class))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
// Ugly fix because RodWalkers don't have access to reads
@By(DataSource.REFERENCE)
@Reference(window=@Window(start=-200,stop=200))
public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> {
@Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
@ -70,17 +75,12 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
private UnifiedGenotyperEngine snpEngine;
private UnifiedGenotyperEngine indelEngine;
public static enum VARIANT_TYPE {
TRUE_POSITIVE,
FALSE_POSITIVE,
}
public static class CountedData {
private long numTP = 0L;
private long numTN = 0L;
private long numFP = 0L;
private long numFN = 0L;
private long numUncovered = 0L;
private long numConfidentCalls = 0L;
private long numNotConfidentCalls = 0L;
@ -93,6 +93,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
numTN += other.numTN;
numFP += other.numFP;
numFN += other.numFN;
numUncovered += other.numUncovered;
numNotConfidentCalls += other.numNotConfidentCalls;
numConfidentCalls += other.numConfidentCalls;
}
@ -118,8 +119,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
if (mbq > 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
if (deletions > 0) uac.MAX_DELETION_FRACTION = deletions;
if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
// Adding the INDEL calling arguments for UG
@ -142,38 +143,37 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
return counter;
VariantContext vcComp = tracker.getVariantContext(ref, compName, null, context.getLocation(), false);
if( vcComp == null )
if( vcComp == null ){
return counter;
vcComp = vcComp.subContextFromGenotypes( vcComp.getGenotypes().values() );
}
if ((vcComp.getFilters().contains("TP") && vcComp.getFilters().contains("FP")) || (!vcComp.getFilters().contains("TP") && !vcComp.getFilters().contains("FP")))
throw new UserException.BadInput("Variant has the wrong filter annotation -- either missing FP/TP or has both. " + vcComp.getChr() + ":" + vcComp.getStart());
VariantCallContext call;
if ( vcComp.isSNP() )
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
else if ( vcComp.isIndel() )
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
else
else {
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
return counter;
VARIANT_TYPE variantType;
if (vcComp.getFilters().contains("TP"))
variantType = VARIANT_TYPE.TRUE_POSITIVE;
else
variantType = VARIANT_TYPE.FALSE_POSITIVE;
}
if (!call.confidentlyCalled) {
counter.numNotConfidentCalls = 1L;
if (variantType == VARIANT_TYPE.TRUE_POSITIVE)
counter.numFN = 1L;
if (context.hasReads()) {
counter.numNotConfidentCalls = 1L;
if (vcComp.getFilters().contains("TP"))
counter.numFN = 1L;
else
counter.numTN = 1L;
}
else
counter.numTN = 1L;
counter.numUncovered = 1L;
}
else {
counter.numConfidentCalls = 1L;
if (variantType == VARIANT_TYPE.TRUE_POSITIVE)
if (vcComp.getFilters().contains("TP"))
counter.numTP = 1L;
else
counter.numFP = 1L;
@ -201,7 +201,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
logger.info("TP = " + reduceSum.numTP);
logger.info("TN = " + reduceSum.numTN);
logger.info("FP = " + reduceSum.numFP);
logger.info("FN = " + (reduceSum.numFN) );
logger.info("FN = " + reduceSum.numFN);
logger.info("Uncovered = " + reduceSum.numUncovered);
logger.info("confidently called = " + reduceSum.numConfidentCalls);
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
}