lots of changes to facilitate calling indels and 1kG
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@666 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
add7b6cf65
commit
313a6d0fb5
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
|
||||||
import org.broadinstitute.sting.playground.utils.*;
|
import org.broadinstitute.sting.playground.utils.*;
|
||||||
|
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods.IndelCall;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.BasicPileup;
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -18,26 +20,32 @@ import java.util.*;
|
||||||
// Draft single sample genotyper
|
// Draft single sample genotyper
|
||||||
// j.maguire 3-7-2009
|
// j.maguire 3-7-2009
|
||||||
|
|
||||||
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, Integer> {
|
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||||
|
{
|
||||||
@Argument(fullName="metrics", shortName="met", doc="metrics", required=false) public String metricsFileName = "/dev/null";
|
@Argument(fullName="metrics", shortName="met", doc="metrics", required=false) public String metricsFileName = "/dev/null";
|
||||||
@Argument(fullName="metInterval", shortName="mi", doc="metInterval", required=false) public Integer metricsInterval = 50000;
|
@Argument(fullName="metInterval", shortName="mi", doc="metInterval", required=false) public Integer metricsInterval = 50000;
|
||||||
@Argument(fullName="printMetrics", shortName="printMetrics", doc="printMetrics", required=false) public Boolean printMetrics = true;
|
@Argument(fullName="printMetrics", shortName="printMetrics", doc="printMetrics", required=false) public Boolean printMetrics = true;
|
||||||
@Argument(fullName="lodThreshold", shortName="lod", doc="lodThreshold", required=false) public Double lodThreshold = 5.0;
|
@Argument(fullName="lodThreshold", shortName="lod", doc="lodThreshold", required=false) public Double lodThreshold = 5.0;
|
||||||
@Argument(fullName="fourBaseMode", shortName="fb", doc="fourBaseMode", required=false) public Boolean fourBaseMode = false;
|
@Argument(fullName="fourBaseMode", shortName="fb", doc="fourBaseMode", required=false) public Boolean fourBaseMode = false;
|
||||||
@Argument(fullName="retest", shortName="re", doc="retest", required=false) public Boolean retest = false;
|
@Argument(fullName="retest", shortName="re", doc="retest", required=false) public Boolean retest = false;
|
||||||
|
@Argument(fullName="call_indels", shortName="call_indels", doc="Call Indels", required=false) public Boolean call_indels = false;
|
||||||
@Argument(fullName="qHom", shortName="qHom", doc="qHom", required=false) public Double qHom = 0.04;
|
@Argument(fullName="qHom", shortName="qHom", doc="qHom", required=false) public Double qHom = 0.04;
|
||||||
@Argument(fullName="qHet", shortName="qHet", doc="qHet", required=false) public Double qHet = 0.49;
|
@Argument(fullName="qHet", shortName="qHet", doc="qHet", required=false) public Double qHet = 0.49;
|
||||||
@Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97;
|
@Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97;
|
||||||
|
|
||||||
public AlleleMetrics metrics;
|
public AlleleMetrics metrics;
|
||||||
|
|
||||||
|
public String sample_name;
|
||||||
|
|
||||||
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { return true; }
|
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { return true; }
|
||||||
public boolean requiresReads() { return true; }
|
public boolean requiresReads() { return true; }
|
||||||
public void initialize() { metrics = new AlleleMetrics(metricsFileName, lodThreshold); }
|
public void initialize() { metrics = new AlleleMetrics(metricsFileName, lodThreshold); sample_name = null; }
|
||||||
|
|
||||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
String rodString = getRodString(tracker);
|
String rodString = getRodString(tracker);
|
||||||
|
|
||||||
|
if (ref == 'N') { return null; }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
AlleleFrequencyEstimate freq;
|
AlleleFrequencyEstimate freq;
|
||||||
if (fourBaseMode) {
|
if (fourBaseMode) {
|
||||||
|
|
@ -57,8 +65,21 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
freq = getOneProbAlleleFrequency(ref, context, rodString);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
if (context.getReads().size() != 0)
|
||||||
|
{
|
||||||
|
SAMRecord read = context.getReads().get(0);
|
||||||
|
String RG = (String)(read.getAttribute("RG"));
|
||||||
|
String local_sample_name = read.getHeader().getReadGroup(RG).getSample();
|
||||||
|
if (sample_name == null) { sample_name = local_sample_name; }
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (! sample_name.equals(local_sample_name)) { System.out.printf("SAMPLE NAME MIXUP: %s vs. %s\n", sample_name, local_sample_name); }
|
||||||
|
assert(sample_name.equals(local_sample_name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AlleleFrequencyEstimate freq = getOneProbAlleleFrequency(ref, context, rodString);
|
AlleleFrequencyEstimate freq = getOneProbAlleleFrequency(ref, context, rodString, sample_name);
|
||||||
|
|
||||||
if (printMetrics) {
|
if (printMetrics) {
|
||||||
if (freq != null) { metrics.nextPosition(freq, tracker); }
|
if (freq != null) { metrics.nextPosition(freq, tracker); }
|
||||||
|
|
@ -151,7 +172,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
probs.length,
|
probs.length,
|
||||||
ReadBackedPileup.basePileupAsString(context.getReads(), context.getOffsets()),
|
ReadBackedPileup.basePileupAsString(context.getReads(), context.getOffsets()),
|
||||||
probs,
|
probs,
|
||||||
posteriors);
|
posteriors,
|
||||||
|
"unknown_sample");
|
||||||
|
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
@ -250,14 +272,29 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return secondaryBaseCounts;
|
return secondaryBaseCounts;
|
||||||
}
|
}
|
||||||
|
|
||||||
private AlleleFrequencyEstimate getOneProbAlleleFrequency(char ref, LocusContext context, String rodString) {
|
private AlleleFrequencyEstimate getOneProbAlleleFrequency(char ref, LocusContext context, String rodString, String sample_name) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
String bases = pileup.getBases();
|
String bases = pileup.getBases();
|
||||||
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
List<SAMRecord> reads = context.getReads();
|
||||||
List<Integer> offsets = context.getOffsets();
|
List<Integer> offsets = context.getOffsets();
|
||||||
ref = Character.toUpperCase(ref);
|
ref = Character.toUpperCase(ref);
|
||||||
|
|
||||||
|
// Handle indels.
|
||||||
|
if (call_indels)
|
||||||
|
{
|
||||||
|
String[] indels = BasicPileup.indelPileup(reads, offsets);
|
||||||
|
IndelCall indel_call = GenotypeLikelihoods.callIndel(indels);
|
||||||
|
if (indel_call != null)
|
||||||
|
{
|
||||||
|
if (! indel_call.type.equals("ref"))
|
||||||
|
{
|
||||||
|
System.out.printf("INDEL %s %s\n", context.getLocation(), indel_call);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle single-base polymorphisms.
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||||
for ( int i = 0; i < reads.size(); i++ )
|
for ( int i = 0; i < reads.size(); i++ )
|
||||||
{
|
{
|
||||||
|
|
@ -272,7 +309,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
G.applyFourBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
G.applyFourBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||||
}
|
}
|
||||||
|
|
||||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods);
|
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods, sample_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getRodString(RefMetaDataTracker tracker) {
|
private String getRodString(RefMetaDataTracker tracker) {
|
||||||
|
|
@ -300,6 +337,83 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
public Integer reduceInit() { return 0; }
|
private String confident_ref_interval_contig = "";
|
||||||
public Integer reduce(AlleleFrequencyEstimate value, Integer sum) { return 0; }
|
private long confident_ref_interval_start = 0;
|
||||||
|
private double confident_ref_interval_LOD_sum = 0;
|
||||||
|
private double confident_ref_interval_length = 0;
|
||||||
|
private long last_position_considered = -1;
|
||||||
|
private boolean inside_confident_ref_interval = false;
|
||||||
|
|
||||||
|
public String reduceInit()
|
||||||
|
{
|
||||||
|
confident_ref_interval_contig = "";
|
||||||
|
confident_ref_interval_start = 0;
|
||||||
|
confident_ref_interval_LOD_sum = 0;
|
||||||
|
confident_ref_interval_length = 0;
|
||||||
|
last_position_considered = -1;
|
||||||
|
inside_confident_ref_interval = false;
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||||
|
{
|
||||||
|
// Print RESULT data for confident calls
|
||||||
|
|
||||||
|
long current_offset = alleleFreq.location.getStart(); //Integer.parseInt(tokens[1]);
|
||||||
|
|
||||||
|
if (inside_confident_ref_interval &&
|
||||||
|
((alleleFreq.lodVsRef > -5.0) || (current_offset != last_position_considered + 1)) )
|
||||||
|
{
|
||||||
|
// No longer hom-ref, so output a ref line.
|
||||||
|
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
||||||
|
|
||||||
|
out.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
||||||
|
confident_ref_interval_contig,
|
||||||
|
confident_ref_interval_start,
|
||||||
|
last_position_considered,
|
||||||
|
lod,
|
||||||
|
(int)(confident_ref_interval_length));
|
||||||
|
|
||||||
|
inside_confident_ref_interval = false;
|
||||||
|
}
|
||||||
|
else if (inside_confident_ref_interval && (alleleFreq.lodVsRef <= -5.0))
|
||||||
|
{
|
||||||
|
// Still hom-ref so increment the counters.
|
||||||
|
confident_ref_interval_LOD_sum += alleleFreq.lodVsRef;
|
||||||
|
confident_ref_interval_length += 1;
|
||||||
|
}
|
||||||
|
else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef > -5.0))
|
||||||
|
{
|
||||||
|
// do nothing.
|
||||||
|
}
|
||||||
|
else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef <= -5.0))
|
||||||
|
{
|
||||||
|
// We moved into a hom-ref region so start a new interval.
|
||||||
|
confident_ref_interval_contig = alleleFreq.location.getContig();
|
||||||
|
confident_ref_interval_start = alleleFreq.location.getStart();
|
||||||
|
confident_ref_interval_LOD_sum = alleleFreq.lodVsRef;
|
||||||
|
confident_ref_interval_length = 1;
|
||||||
|
inside_confident_ref_interval = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
last_position_considered = current_offset;
|
||||||
|
|
||||||
|
if (alleleFreq.lodVsRef >= 5) {
|
||||||
|
out.print(alleleFreq.asGFFString());
|
||||||
|
|
||||||
|
/*
|
||||||
|
String gtype = genotypeTypeString(alleleFreq.qstar, alleleFreq.N);
|
||||||
|
System.out.print("DEBUG " + gtype + " ");
|
||||||
|
if (gtype.contentEquals("het")) {
|
||||||
|
System.out.println(alleleFreq.ref + "" + alleleFreq.alt);
|
||||||
|
} else if (gtype.contentEquals("hom")) {
|
||||||
|
System.out.println(alleleFreq.ref + "" + alleleFreq.ref);
|
||||||
|
} else {
|
||||||
|
System.out.println(alleleFreq.alt + "" + alleleFreq.alt);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue