Misc. improvements to the LMW code. Support for emitting all sites, regardless of genotype. Min and max quality scores.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2411 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-12-19 23:20:57 +00:00
parent 7e0f69dab5
commit 9bf2d12c64
1 changed files with 41 additions and 15 deletions

View File

@ -19,8 +19,8 @@ import java.util.*;
*/ */
@By(DataSource.REFERENCE) @By(DataSource.REFERENCE)
public class LocusMismatchWalker extends LocusWalker<String,Integer> implements TreeReducible<Integer> { public class LocusMismatchWalker extends LocusWalker<String,Integer> implements TreeReducible<Integer> {
@Argument(fullName="confidentRefThreshold",doc="Set the lod score that defines confidence in ref, defaults to 4", required=false) //@Argument(fullName="confidentRefThreshold",doc="Set the lod score that defines confidence in ref, defaults to 4", required=false)
int confidentRefThreshold = 5; //int confidentRefThreshold = 5;
@Argument(fullName="maxNumMismatches",doc="Set the maximum number of mismatches at a locus before choosing not to use it in calculation. Defaults to 1.", required=false) @Argument(fullName="maxNumMismatches",doc="Set the maximum number of mismatches at a locus before choosing not to use it in calculation. Defaults to 1.", required=false)
int maxNumMismatches = 100; int maxNumMismatches = 100;
@Argument(fullName="minMappingQuality", doc ="Set the alignment quality below which to ignore reads; defaults to 30", required = false) @Argument(fullName="minMappingQuality", doc ="Set the alignment quality below which to ignore reads; defaults to 30", required = false)
@ -31,6 +31,8 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
int maxDepth = 100; int maxDepth = 100;
@Argument(fullName="minBaseQuality", doc = "Set the base quality score below which to ignore bases in the pileup, defaults to 20", required = false) @Argument(fullName="minBaseQuality", doc = "Set the base quality score below which to ignore bases in the pileup, defaults to 20", required = false)
int minQualityScore = 1; int minQualityScore = 1;
@Argument(fullName="maxBaseQuality", doc = "Set the base quality score below which to ignore bases in the pileup, defaults to no restriction", required = false)
int maxQualityScore = 99;
@Argument(fullName="minMismatches", doc = "Minimum number of mismatches at a locus before a site is displayed", required = false) @Argument(fullName="minMismatches", doc = "Minimum number of mismatches at a locus before a site is displayed", required = false)
int minMismatches = 1; int minMismatches = 1;
@ -46,6 +48,9 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
uac.baseModel = BaseMismatchModel.THREE_STATE; uac.baseModel = BaseMismatchModel.THREE_STATE;
uac.ALL_BASES = true; uac.ALL_BASES = true;
ug.setUnifiedArgumentCollection(uac); ug.setUnifiedArgumentCollection(uac);
// print the header
out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
} }
public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
@ -53,7 +58,9 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
ReadBackedPileup pileup = context.getPileup(); ReadBackedPileup pileup = context.getPileup();
if ( locusIsUsable(tracker, ref, pileup, context) ) { if ( locusIsUsable(tracker, ref, pileup, context) ) {
result = errorCounts( ref, pileup ); Genotype g = getGenotype(tracker, ref, context);
if ( g != null && g.isPointGenotype() )
result = errorCounts( ref, pileup, g );
} }
return result; return result;
@ -73,11 +80,10 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
} }
public Integer reduceInit() { public Integer reduceInit() {
out.printf("loc ref depth nMM qSumMM A C G T%n");
return 1; return 1;
} }
private String errorCounts( ReferenceContext ref, ReadBackedPileup pileup ) { private String errorCounts( ReferenceContext ref, ReadBackedPileup pileup, Genotype g ) {
int[] baseCounts = { 0, 0, 0, 0 }; int[] baseCounts = { 0, 0, 0, 0 };
int usableDepth = 0; int usableDepth = 0;
int nMismatches = 0; int nMismatches = 0;
@ -100,18 +106,28 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
for ( char b : BaseUtils.BASES ) { for ( char b : BaseUtils.BASES ) {
baseCountString += baseCounts[BaseUtils.simpleBaseToBaseIndex(b)] + " "; baseCountString += baseCounts[BaseUtils.simpleBaseToBaseIndex(b)] + " ";
} }
return String.format("%s %c %d %d %d %s", pileup.getLocation(), ref.getBase(), usableDepth, nMismatches, qSumMismatches, baseCountString); return String.format("%s %c %10s %5.2f %d %d %d %s",
pileup.getLocation(), ref.getBase(),
getGenotypeClass(g, ref.getBase()), 10 * g.getNegLog10PError(),
usableDepth, nMismatches, qSumMismatches, baseCountString);
} }
return null; return null;
} }
private String getGenotypeClass(Genotype g, char ref) {
if ( ! g.isVariant(ref) ) return "HOM-REF";
else if ( g.isHet() ) return "HET";
else if ( g.isHom() ) return "HOM-NONREF";
else throw new StingException("Unexpected genotype in getGenotypeClass " + g);
}
public boolean useRead( PileupElement e ) { public boolean useRead( PileupElement e ) {
if ( e.getRead().getMappingQuality() <= minMappingQuality ) { if ( e.getRead().getMappingQuality() <= minMappingQuality ) {
return false; return false;
} else if ( ! BaseUtils.isRegularBase( e.getBase() ) ) { } else if ( ! BaseUtils.isRegularBase( e.getBase() ) ) {
return false; return false;
} else if ( e.getQual() <= minQualityScore ) { } else if ( e.getQual() <= minQualityScore || e.getQual() > maxQualityScore ) {
return false; return false;
} else { } else {
return true; return true;
@ -122,8 +138,9 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
return BaseUtils.isRegularBase(ref.getBase()) && return BaseUtils.isRegularBase(ref.getBase()) &&
pileup.size() >= minDepth && pileup.size() < maxDepth && pileup.size() >= minDepth && pileup.size() < maxDepth &&
notCoveredByVariations(tracker) && notCoveredByVariations(tracker) &&
pileupContainsNoNs(pileup) && pileupContainsNoNs(pileup);
baseIsConfidentRef(tracker,ref,context); // pileupContainsNoNs(pileup) &&
// baseIsConfidentRef(tracker,ref,context);
} }
private boolean notCoveredByVariations( RefMetaDataTracker tracker ) { private boolean notCoveredByVariations( RefMetaDataTracker tracker ) {
@ -147,17 +164,26 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
return true; return true;
} }
private boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
Pair<VariationCall, List<Genotype>> calls = ug.map(tracker,ref,context); Pair<VariationCall, List<Genotype>> calls = ug.map(tracker,ref,context);
if ( calls == null || calls.first == null) if ( calls == null || calls.first == null || calls.second == null )
return false; return null;
else { else {
VariationCall var = calls.getFirst(); return calls.second.get(0);
return var.isReference() && var.getNegLog10PError() > confidentRefThreshold;
//return ( var.isReference() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold );
} }
} }
// private boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
// Pair<VariationCall, List<Genotype>> calls = ug.map(tracker,ref,context);
// if ( calls == null || calls.first == null)
// return false;
// else {
// VariationCall var = calls.getFirst();
// return var.isReference() && var.getNegLog10PError() > confidentRefThreshold;
// //return ( var.isReference() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold );
// }
// }
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
; ;
} }