UG: small bug fix when creating empty variant contexts in UG for the -EMIT_ALL_SITES to allow indels.
GAV: First version of the walker that validates reads from a BAM file based on an annotated VCF with TP/FP annotations. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5396 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9384b2ff65
commit
02006954bc
|
|
@ -210,7 +210,7 @@ public class UnifiedGenotyperEngine {
|
||||||
private VariantContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
private VariantContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||||
VariantContext vc;
|
VariantContext vc;
|
||||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), true);
|
final VariantContext vcInput = tracker.getVariantContext(ref, "alleles", null, ref.getLocus(), false);
|
||||||
if ( vcInput == null )
|
if ( vcInput == null )
|
||||||
return null;
|
return null;
|
||||||
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
|
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
|
||||||
|
|
|
||||||
|
|
@ -51,9 +51,14 @@ import java.util.*;
|
||||||
* @help.summary Validates the calls on a ROD track using a BAM dataset.
|
* @help.summary Validates the calls on a ROD track using a BAM dataset.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//@Requires(value={DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_ORDERED_DATA})
|
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="alleles",type=VariantContext.class))
|
||||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||||
|
|
||||||
|
// Ugly fix because RodWalkers don't have access to reads
|
||||||
|
@By(DataSource.REFERENCE)
|
||||||
|
@Reference(window=@Window(start=-200,stop=200))
|
||||||
|
|
||||||
|
|
||||||
public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> {
|
public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalker.CountedData, GenotypeAndValidateWalker.CountedData> {
|
||||||
|
|
||||||
@Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
|
@Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
|
||||||
|
|
@ -70,17 +75,12 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
private UnifiedGenotyperEngine snpEngine;
|
private UnifiedGenotyperEngine snpEngine;
|
||||||
private UnifiedGenotyperEngine indelEngine;
|
private UnifiedGenotyperEngine indelEngine;
|
||||||
|
|
||||||
|
|
||||||
public static enum VARIANT_TYPE {
|
|
||||||
TRUE_POSITIVE,
|
|
||||||
FALSE_POSITIVE,
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class CountedData {
|
public static class CountedData {
|
||||||
private long numTP = 0L;
|
private long numTP = 0L;
|
||||||
private long numTN = 0L;
|
private long numTN = 0L;
|
||||||
private long numFP = 0L;
|
private long numFP = 0L;
|
||||||
private long numFN = 0L;
|
private long numFN = 0L;
|
||||||
|
private long numUncovered = 0L;
|
||||||
private long numConfidentCalls = 0L;
|
private long numConfidentCalls = 0L;
|
||||||
private long numNotConfidentCalls = 0L;
|
private long numNotConfidentCalls = 0L;
|
||||||
|
|
||||||
|
|
@ -93,6 +93,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
numTN += other.numTN;
|
numTN += other.numTN;
|
||||||
numFP += other.numFP;
|
numFP += other.numFP;
|
||||||
numFN += other.numFN;
|
numFN += other.numFN;
|
||||||
|
numUncovered += other.numUncovered;
|
||||||
numNotConfidentCalls += other.numNotConfidentCalls;
|
numNotConfidentCalls += other.numNotConfidentCalls;
|
||||||
numConfidentCalls += other.numConfidentCalls;
|
numConfidentCalls += other.numConfidentCalls;
|
||||||
}
|
}
|
||||||
|
|
@ -118,8 +119,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||||
uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
if (mbq > 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
|
if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
|
||||||
if (deletions > 0) uac.MAX_DELETION_FRACTION = deletions;
|
if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
|
||||||
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
|
snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||||
|
|
||||||
// Adding the INDEL calling arguments for UG
|
// Adding the INDEL calling arguments for UG
|
||||||
|
|
@ -142,38 +143,37 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
return counter;
|
return counter;
|
||||||
|
|
||||||
VariantContext vcComp = tracker.getVariantContext(ref, compName, null, context.getLocation(), false);
|
VariantContext vcComp = tracker.getVariantContext(ref, compName, null, context.getLocation(), false);
|
||||||
if( vcComp == null )
|
if( vcComp == null ){
|
||||||
return counter;
|
return counter;
|
||||||
vcComp = vcComp.subContextFromGenotypes( vcComp.getGenotypes().values() );
|
}
|
||||||
|
|
||||||
if ((vcComp.getFilters().contains("TP") && vcComp.getFilters().contains("FP")) || (!vcComp.getFilters().contains("TP") && !vcComp.getFilters().contains("FP")))
|
if ((vcComp.getFilters().contains("TP") && vcComp.getFilters().contains("FP")) || (!vcComp.getFilters().contains("TP") && !vcComp.getFilters().contains("FP")))
|
||||||
throw new UserException.BadInput("Variant has the wrong filter annotation -- either missing FP/TP or has both. " + vcComp.getChr() + ":" + vcComp.getStart());
|
throw new UserException.BadInput("Variant has the wrong filter annotation -- either missing FP/TP or has both. " + vcComp.getChr() + ":" + vcComp.getStart());
|
||||||
|
|
||||||
VariantCallContext call;
|
VariantCallContext call;
|
||||||
if ( vcComp.isSNP() )
|
if ( vcComp.isSNP() )
|
||||||
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||||
|
|
||||||
else if ( vcComp.isIndel() )
|
else if ( vcComp.isIndel() )
|
||||||
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||||
|
else {
|
||||||
else
|
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
|
||||||
return counter;
|
return counter;
|
||||||
|
}
|
||||||
VARIANT_TYPE variantType;
|
|
||||||
if (vcComp.getFilters().contains("TP"))
|
|
||||||
variantType = VARIANT_TYPE.TRUE_POSITIVE;
|
|
||||||
else
|
|
||||||
variantType = VARIANT_TYPE.FALSE_POSITIVE;
|
|
||||||
|
|
||||||
if (!call.confidentlyCalled) {
|
if (!call.confidentlyCalled) {
|
||||||
|
if (context.hasReads()) {
|
||||||
counter.numNotConfidentCalls = 1L;
|
counter.numNotConfidentCalls = 1L;
|
||||||
if (variantType == VARIANT_TYPE.TRUE_POSITIVE)
|
if (vcComp.getFilters().contains("TP"))
|
||||||
counter.numFN = 1L;
|
counter.numFN = 1L;
|
||||||
else
|
else
|
||||||
counter.numTN = 1L;
|
counter.numTN = 1L;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
counter.numUncovered = 1L;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
counter.numConfidentCalls = 1L;
|
counter.numConfidentCalls = 1L;
|
||||||
if (variantType == VARIANT_TYPE.TRUE_POSITIVE)
|
if (vcComp.getFilters().contains("TP"))
|
||||||
counter.numTP = 1L;
|
counter.numTP = 1L;
|
||||||
else
|
else
|
||||||
counter.numFP = 1L;
|
counter.numFP = 1L;
|
||||||
|
|
@ -201,7 +201,8 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
logger.info("TP = " + reduceSum.numTP);
|
logger.info("TP = " + reduceSum.numTP);
|
||||||
logger.info("TN = " + reduceSum.numTN);
|
logger.info("TN = " + reduceSum.numTN);
|
||||||
logger.info("FP = " + reduceSum.numFP);
|
logger.info("FP = " + reduceSum.numFP);
|
||||||
logger.info("FN = " + (reduceSum.numFN) );
|
logger.info("FN = " + reduceSum.numFN);
|
||||||
|
logger.info("Uncovered = " + reduceSum.numUncovered);
|
||||||
logger.info("confidently called = " + reduceSum.numConfidentCalls);
|
logger.info("confidently called = " + reduceSum.numConfidentCalls);
|
||||||
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
|
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue