From 497a1b059ef906d2f085b77a88732ff9eeca03d3 Mon Sep 17 00:00:00 2001 From: Andrey Sivachenko Date: Wed, 7 Mar 2012 18:34:11 -0500 Subject: [PATCH] transition to JEXL completed, old parameters setting individual cutoffs now deprecated --- .../indels/SomaticIndelDetectorWalker.java | 155 ++++++++++-------- 1 file changed, 90 insertions(+), 65 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 4247ab7a2..733d32e3d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -151,30 +151,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker { doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false) java.io.File bedOutput = null; + @Deprecated @Argument(fullName="minCoverage", shortName="minCoverage", doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+ - "with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false) + "with --unpaired (single sample) option, this value is used for minimum sample coverage. "+ + "INSTEAD USE: T_COV { String RefseqFileName = null; - @Argument(shortName="filter", doc="One or more criteria to use when selecting the data", required=false) + @Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " + + "putative indel will be discarded and nothing will be printed into the output (unless genotyping "+ + "at the specific position is explicitly requested, see -genotype). "+ + "Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false) public ArrayList FILTER_EXPRESSIONS = new ArrayList(); //@Argument(fullName="blacklistedLanes", shortName="BL", @@ -425,6 +442,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker { refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile); // Now initialize JEXL expressions: + if ( FILTER_EXPRESSIONS.size() == 0 ) { + if ( call_unpaired ) { + FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7"); + } else { + FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7"); + } + } for ( String s : FILTER_EXPRESSIONS ) { try { Expression e = jexlEngine.createExpression(s); @@ -706,14 +730,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker { if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue; IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH); + JexlContext jc = new MapContext(); + normalCall.fillContext(jc,singleMetricsCassette); + boolean discard_event = false; - if ( normalCall.getCoverage() < minCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + for ( Expression e : jexlExpressions ) { + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; } } + if ( discard_event && ! genotype ) { + normal_context.indelsAt(pos).clear(); + continue; //* + } + +// if ( normalCall.getCoverage() < minCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } + if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos); long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() ); @@ -742,24 +778,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker { location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos); - boolean haveCall = normalCall.isCall(); // cache the value - - if ( haveCall || genotype) { - if ( haveCall ) normalCallsMade++; - printVCFLine(vcf_writer,normalCall); - if ( bedWriter != null ) normalCall.printBedLine(bedWriter); - if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall); - lastGenotypedPosition = pos; - } + if ( ! discard_event ) normalCallsMade++; + printVCFLine(vcf_writer,normalCall, discard_event); + if ( bedWriter != null ) normalCall.printBedLine(bedWriter); + if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event); + lastGenotypedPosition = pos; normal_context.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again // (we might otherwise in the case when 1) there is another indel that follows // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) - -// for ( IndelVariant var : variants ) { -// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); -// } } if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")"); @@ -877,24 +905,29 @@ public class SomaticIndelDetectorWalker extends ReadWalker { JexlContext jc = new MapContext(); tumorCall.fillContext(jc,tumorMetricsCassette); normalCall.fillContext(jc,normalMetricsCassette); - boolean result = false; + boolean discard_event = false; for ( Expression e : jexlExpressions ) { - if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { result=true; break; } + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; } } - if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage - } - if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + if ( discard_event && ! genotype ) { + tumor_context.indelsAt(pos).clear(); + normal_context.indelsAt(pos).clear(); + continue; //* } +// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } +// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } if ( DEBUG ) { System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, "); @@ -922,32 +955,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker { if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right -// location = getToolkit().getGenomeLocParser().setStart(location,pos); -// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data - location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data - boolean haveCall = tumorCall.isCall(); // cache the value +// boolean haveCall = tumorCall.isCall(); // cache the value - if ( haveCall || genotype ) { - if ( haveCall ) tumorCallsMade++; + if ( ! discard_event ) tumorCallsMade++; - printVCFLine(vcf_writer,normalCall,tumorCall); + printVCFLine(vcf_writer,normalCall,tumorCall,discard_event); - if ( bedWriter != null ) tumorCall.printBedLine(bedWriter); + if ( bedWriter != null ) tumorCall.printBedLine(bedWriter); + + if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event ); + lastGenotypedPosition = pos; - if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall ); - lastGenotypedPosition = pos; - } tumor_context.indelsAt(pos).clear(); normal_context.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again // (we might otherwise in the case when 1) there is another indel that follows // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) - -// for ( IndelVariant var : variants ) { -// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); -// } } if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")"); @@ -1001,14 +1026,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } - public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) { + public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); StringBuilder fullRecord = new StringBuilder(); fullRecord.append(makeFullRecord(normalCall)); fullRecord.append(annotationString); - if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); + if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); try { verboseWriter.write(fullRecord.toString()); verboseWriter.write('\n'); @@ -1019,7 +1044,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } - public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) { + public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); @@ -1067,7 +1092,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { fullRecord.append('\t'); fullRecord.append(annotationString); - if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); + if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); try { verboseWriter.write(fullRecord.toString()); @@ -1077,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } } - public void printVCFLine(VCFWriter vcf, IndelPrecall call) { + public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) { long start = call.getPosition()-1; // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed. @@ -1114,14 +1139,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Map attrs = call.makeStatsAttributes(null); - if ( call.isCall() ) // we made a call - put actual het genotype here: + if ( ! discard_event ) // we made a call - put actual het genotype here: genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false)); else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false)); } Set filters = null; - if ( call.getVariant() != null && ! call.isCall() ) { + if ( call.getVariant() != null && discard_event ) { filters = new HashSet(); filters.add("NoCall"); } @@ -1149,7 +1174,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } } - public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) { + public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) { long start = tCall.getPosition()-1; long stop = start; @@ -1166,7 +1191,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Map attrs = new HashMap(); boolean isSomatic = false; - if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) { + if ( nCall.getVariant() == null && tCall.getVariant() != null ) { isSomatic = true; attrs.put(VCFConstants.SOMATIC_KEY,true); } @@ -1209,7 +1234,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } Set filters = null; - if ( tCall.getVariant() != null && ! tCall.isCall() ) { + if ( tCall.getVariant() != null && discard_event ) { filters = new HashSet(); filters.add("NoCall"); } @@ -1662,7 +1687,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { context.set(cassette[C_COV],total_coverage); context.set(cassette[C_CONS_CNT],consensus_indel_count); } - +/* public boolean isCall() { boolean ret = ( consensus_indel_count >= minIndelCount && (double)consensus_indel_count > minFraction * total_coverage && @@ -1675,7 +1700,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { return ret; // return true; } - +*/ /** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed * variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus) * @param variants