Walker for calculating non-independent base errors, v1. Will be moved to somewhere not in core
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2352 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1389ac6bdf
commit
1da97ebb85
|
|
@ -0,0 +1,155 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@By(DataSource.REFERENCE)
|
||||
public class LocusMismatchWalker extends LocusWalker<String,Integer> implements TreeReducible<Integer> {
|
||||
@Argument(fullName="confidentRefThreshold",doc="Set the lod score that defines confidence in ref, defaults to 4", required=false)
|
||||
int confidentRefThreshold = 5;
|
||||
@Argument(fullName="maxNumMismatches",doc="Set the maximum number of mismatches at a locus before choosing not to use it in calculation. Defaults to 1.", required=false)
|
||||
int maxNumMismatches = 100;
|
||||
@Argument(fullName="minMappingQuality", doc ="Set the alignment quality below which to ignore reads; defaults to 30", required = false)
|
||||
int minMappingQuality = 1;
|
||||
@Argument(fullName="minDepth",doc="Set the minimum number of reads at a locus before choosing to use it in calculation. Defaults to 20.", required=false)
|
||||
int minDepth = 10;
|
||||
@Argument(fullName="maxDepth",doc="Set the minimum number of reads at a locus before choosing to use it in calculation. Defaults to 20.", required=false)
|
||||
int maxDepth = 100;
|
||||
@Argument(fullName="minBaseQuality", doc = "Set the base quality score below which to ignore bases in the pileup, defaults to 20", required = false)
|
||||
int minQualityScore = 1;
|
||||
@Argument(fullName="minMismatches", doc = "Minimum number of mismatches at a locus before a site is displayed", required = false)
|
||||
int minMismatches = 1;
|
||||
|
||||
private UnifiedGenotyper ug;
|
||||
|
||||
public void initialize() {
|
||||
ug = new UnifiedGenotyper();
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
ug.initialize();
|
||||
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
||||
uac.ALL_BASES = true;
|
||||
ug.setUnifiedArgumentCollection(uac);
|
||||
}
|
||||
|
||||
public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
String result = null;
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
if ( locusIsUsable(tracker, ref, pileup, context) ) {
|
||||
result = errorCounts( ref, pileup );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public Integer reduce( String map, Integer reduce ) {
|
||||
if ( map != null )
|
||||
out.println(map);
|
||||
return reduce;
|
||||
}
|
||||
|
||||
public Integer treeReduce( Integer reduce1, Integer reduce2 ) {
|
||||
return reduce1 + reduce2;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
out.printf("loc ref depth nMM qSumMM A C G T%n");
|
||||
return null;
|
||||
}
|
||||
|
||||
private String errorCounts( ReferenceContext ref, ReadBackedPileup pileup ) {
|
||||
int[] baseCounts = { 0, 0, 0, 0 };
|
||||
int usableDepth = 0;
|
||||
int nMismatches = 0;
|
||||
int qSumMismatches = 0;
|
||||
|
||||
for ( PileupElement e : pileup ) {
|
||||
if ( useRead(e) ) {
|
||||
//System.out.printf("Using %s%n", e.getRead().getReadName());
|
||||
baseCounts[e.getBaseIndex()] += 1;
|
||||
usableDepth++;
|
||||
if ( ! BaseUtils.basesAreEqual(e.getBase(), (byte)ref.getBase()) ) {
|
||||
nMismatches++;
|
||||
qSumMismatches += e.getQual();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( nMismatches < maxNumMismatches && nMismatches >= minMismatches && usableDepth >= minDepth ) {
|
||||
String baseCountString = "";
|
||||
for ( char b : BaseUtils.BASES ) {
|
||||
baseCountString += baseCounts[BaseUtils.simpleBaseToBaseIndex(b)] + " ";
|
||||
}
|
||||
return String.format("%s %c %d %d %d %s", pileup.getLocation(), ref.getBase(), usableDepth, nMismatches, qSumMismatches, baseCountString);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean useRead( PileupElement e ) {
|
||||
if ( e.getRead().getMappingQuality() <= minMappingQuality ) {
|
||||
return false;
|
||||
} else if ( ! BaseUtils.isRegularBase( e.getBase() ) ) {
|
||||
return false;
|
||||
} else if ( e.getQual() <= minQualityScore ) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean locusIsUsable( RefMetaDataTracker tracker, ReferenceContext ref, ReadBackedPileup pileup, AlignmentContext context ) {
|
||||
return BaseUtils.isRegularBase(ref.getBase()) &&
|
||||
pileup.size() >= minDepth && pileup.size() < maxDepth &&
|
||||
notCoveredByVariations(tracker) &&
|
||||
pileupContainsNoNs(pileup) &&
|
||||
baseIsConfidentRef(tracker,ref,context);
|
||||
}
|
||||
|
||||
private boolean notCoveredByVariations( RefMetaDataTracker tracker ) {
|
||||
for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) {
|
||||
if ( datum instanceof Variation || datum instanceof Genotype ) {
|
||||
//System.out.printf("Ignoring site because of %s%n", datum);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean pileupContainsNoNs(ReadBackedPileup pileup) {
|
||||
for ( byte c : pileup.getBases() ) {
|
||||
if ( c == 'N' ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
Pair<VariationCall, List<Genotype>> calls = ug.map(tracker,ref,context);
|
||||
if ( calls == null || calls.first == null)
|
||||
return false;
|
||||
else {
|
||||
VariationCall var = calls.getFirst();
|
||||
return var.isReference() && var.getNegLog10PError() > confidentRefThreshold;
|
||||
//return ( var.isReference() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold );
|
||||
}
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
/**
|
||||
* Totally experimental tools for working with a graphical reference (i.e., one that explicitly represents variation).
|
||||
* Not reliable, complete, or even optimized. Purely for initial evaluation of the approach
|
||||
*/
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.graphalign;
|
||||
|
|
@ -38,6 +38,10 @@ public class PileupElement {
|
|||
return isDeletion() ? DELETION_BASE : read.getReadBases()[offset];
|
||||
}
|
||||
|
||||
public int getBaseIndex() {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
|
||||
}
|
||||
|
||||
public byte getSecondBase() {
|
||||
return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue