From 9bf2d12c648f15671f6ee551fcd7b738d33ff56b Mon Sep 17 00:00:00 2001 From: depristo Date: Sat, 19 Dec 2009 23:20:57 +0000 Subject: [PATCH] Misc. improvements to the LMW code. Support for emitting all sites, regardless of genotype. Min and max quality scores. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2411 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/LocusMismatchWalker.java | 56 ++++++++++++++----- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java index cbd9f312c..8837bace0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java @@ -19,8 +19,8 @@ import java.util.*; */ @By(DataSource.REFERENCE) public class LocusMismatchWalker extends LocusWalker implements TreeReducible { - @Argument(fullName="confidentRefThreshold",doc="Set the lod score that defines confidence in ref, defaults to 4", required=false) - int confidentRefThreshold = 5; + //@Argument(fullName="confidentRefThreshold",doc="Set the lod score that defines confidence in ref, defaults to 4", required=false) + //int confidentRefThreshold = 5; @Argument(fullName="maxNumMismatches",doc="Set the maximum number of mismatches at a locus before choosing not to use it in calculation. Defaults to 1.", required=false) int maxNumMismatches = 100; @Argument(fullName="minMappingQuality", doc ="Set the alignment quality below which to ignore reads; defaults to 30", required = false) @@ -31,6 +31,8 @@ public class LocusMismatchWalker extends LocusWalker implements int maxDepth = 100; @Argument(fullName="minBaseQuality", doc = "Set the base quality score below which to ignore bases in the pileup, defaults to 20", required = false) int minQualityScore = 1; + @Argument(fullName="maxBaseQuality", doc = "Set the base quality score below which to ignore bases in the pileup, defaults to no restriction", required = false) + int maxQualityScore = 99; @Argument(fullName="minMismatches", doc = "Minimum number of mismatches at a locus before a site is displayed", required = false) int minMismatches = 1; @@ -46,6 +48,9 @@ public class LocusMismatchWalker extends LocusWalker implements uac.baseModel = BaseMismatchModel.THREE_STATE; uac.ALL_BASES = true; ug.setUnifiedArgumentCollection(uac); + + // print the header + out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n"); } public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { @@ -53,7 +58,9 @@ public class LocusMismatchWalker extends LocusWalker implements ReadBackedPileup pileup = context.getPileup(); if ( locusIsUsable(tracker, ref, pileup, context) ) { - result = errorCounts( ref, pileup ); + Genotype g = getGenotype(tracker, ref, context); + if ( g != null && g.isPointGenotype() ) + result = errorCounts( ref, pileup, g ); } return result; @@ -73,11 +80,10 @@ public class LocusMismatchWalker extends LocusWalker implements } public Integer reduceInit() { - out.printf("loc ref depth nMM qSumMM A C G T%n"); return 1; } - private String errorCounts( ReferenceContext ref, ReadBackedPileup pileup ) { + private String errorCounts( ReferenceContext ref, ReadBackedPileup pileup, Genotype g ) { int[] baseCounts = { 0, 0, 0, 0 }; int usableDepth = 0; int nMismatches = 0; @@ -100,18 +106,28 @@ public class LocusMismatchWalker extends LocusWalker implements for ( char b : BaseUtils.BASES ) { baseCountString += baseCounts[BaseUtils.simpleBaseToBaseIndex(b)] + " "; } - return String.format("%s %c %d %d %d %s", pileup.getLocation(), ref.getBase(), usableDepth, nMismatches, qSumMismatches, baseCountString); + return String.format("%s %c %10s %5.2f %d %d %d %s", + pileup.getLocation(), ref.getBase(), + getGenotypeClass(g, ref.getBase()), 10 * g.getNegLog10PError(), + usableDepth, nMismatches, qSumMismatches, baseCountString); } return null; } + private String getGenotypeClass(Genotype g, char ref) { + if ( ! g.isVariant(ref) ) return "HOM-REF"; + else if ( g.isHet() ) return "HET"; + else if ( g.isHom() ) return "HOM-NONREF"; + else throw new StingException("Unexpected genotype in getGenotypeClass " + g); + } + public boolean useRead( PileupElement e ) { if ( e.getRead().getMappingQuality() <= minMappingQuality ) { return false; } else if ( ! BaseUtils.isRegularBase( e.getBase() ) ) { return false; - } else if ( e.getQual() <= minQualityScore ) { + } else if ( e.getQual() <= minQualityScore || e.getQual() > maxQualityScore ) { return false; } else { return true; @@ -122,8 +138,9 @@ public class LocusMismatchWalker extends LocusWalker implements return BaseUtils.isRegularBase(ref.getBase()) && pileup.size() >= minDepth && pileup.size() < maxDepth && notCoveredByVariations(tracker) && - pileupContainsNoNs(pileup) && - baseIsConfidentRef(tracker,ref,context); + pileupContainsNoNs(pileup); +// pileupContainsNoNs(pileup) && +// baseIsConfidentRef(tracker,ref,context); } private boolean notCoveredByVariations( RefMetaDataTracker tracker ) { @@ -147,17 +164,26 @@ public class LocusMismatchWalker extends LocusWalker implements return true; } - private boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { + private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { Pair> calls = ug.map(tracker,ref,context); - if ( calls == null || calls.first == null) - return false; + if ( calls == null || calls.first == null || calls.second == null ) + return null; else { - VariationCall var = calls.getFirst(); - return var.isReference() && var.getNegLog10PError() > confidentRefThreshold; - //return ( var.isReference() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold ); + return calls.second.get(0); } } +// private boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { +// Pair> calls = ug.map(tracker,ref,context); +// if ( calls == null || calls.first == null) +// return false; +// else { +// VariationCall var = calls.getFirst(); +// return var.isReference() && var.getNegLog10PError() > confidentRefThreshold; +// //return ( var.isReference() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold ); +// } +// } + public void onTraversalDone(Integer result) { ; }