From 313a6d0fb5261ff2968b52ed0d1966bae9a30355 Mon Sep 17 00:00:00 2001 From: jmaguire Date: Tue, 12 May 2009 15:11:42 +0000 Subject: [PATCH] lots of changes to facilitate calling indels and 1kG git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@666 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/SingleSampleGenotyper.java | 130 ++++++++++++++++-- 1 file changed, 122 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index 4611faa39..687d8d40b 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.playground.utils.*; +import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods.IndelCall; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.BasicPileup; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; @@ -18,26 +20,32 @@ import java.util.*; // Draft single sample genotyper // j.maguire 3-7-2009 -public class SingleSampleGenotyper extends LocusWalker { +public class SingleSampleGenotyper extends LocusWalker +{ @Argument(fullName="metrics", shortName="met", doc="metrics", required=false) public String metricsFileName = "/dev/null"; @Argument(fullName="metInterval", shortName="mi", doc="metInterval", required=false) public Integer metricsInterval = 50000; @Argument(fullName="printMetrics", shortName="printMetrics", doc="printMetrics", required=false) public Boolean printMetrics = true; @Argument(fullName="lodThreshold", shortName="lod", doc="lodThreshold", required=false) public Double lodThreshold = 5.0; @Argument(fullName="fourBaseMode", shortName="fb", doc="fourBaseMode", required=false) public Boolean fourBaseMode = false; @Argument(fullName="retest", shortName="re", doc="retest", required=false) public Boolean retest = false; + @Argument(fullName="call_indels", shortName="call_indels", doc="Call Indels", required=false) public Boolean call_indels = false; @Argument(fullName="qHom", shortName="qHom", doc="qHom", required=false) public Double qHom = 0.04; @Argument(fullName="qHet", shortName="qHet", doc="qHet", required=false) public Double qHet = 0.49; @Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97; public AlleleMetrics metrics; + + public String sample_name; public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) { return true; } public boolean requiresReads() { return true; } - public void initialize() { metrics = new AlleleMetrics(metricsFileName, lodThreshold); } + public void initialize() { metrics = new AlleleMetrics(metricsFileName, lodThreshold); sample_name = null; } public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) { String rodString = getRodString(tracker); + if (ref == 'N') { return null; } + /* AlleleFrequencyEstimate freq; if (fourBaseMode) { @@ -57,8 +65,21 @@ public class SingleSampleGenotyper extends LocusWalker reads = context.getReads(); List offsets = context.getOffsets(); ref = Character.toUpperCase(ref); + + // Handle indels. + if (call_indels) + { + String[] indels = BasicPileup.indelPileup(reads, offsets); + IndelCall indel_call = GenotypeLikelihoods.callIndel(indels); + if (indel_call != null) + { + if (! indel_call.type.equals("ref")) + { + System.out.printf("INDEL %s %s\n", context.getLocation(), indel_call); + } + } + } + // Handle single-base polymorphisms. GenotypeLikelihoods G = new GenotypeLikelihoods(); for ( int i = 0; i < reads.size(); i++ ) { @@ -272,7 +309,7 @@ public class SingleSampleGenotyper extends LocusWalker -5.0) || (current_offset != last_position_considered + 1)) ) + { + // No longer hom-ref, so output a ref line. + double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length; + + out.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n", + confident_ref_interval_contig, + confident_ref_interval_start, + last_position_considered, + lod, + (int)(confident_ref_interval_length)); + + inside_confident_ref_interval = false; + } + else if (inside_confident_ref_interval && (alleleFreq.lodVsRef <= -5.0)) + { + // Still hom-ref so increment the counters. + confident_ref_interval_LOD_sum += alleleFreq.lodVsRef; + confident_ref_interval_length += 1; + } + else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef > -5.0)) + { + // do nothing. + } + else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef <= -5.0)) + { + // We moved into a hom-ref region so start a new interval. + confident_ref_interval_contig = alleleFreq.location.getContig(); + confident_ref_interval_start = alleleFreq.location.getStart(); + confident_ref_interval_LOD_sum = alleleFreq.lodVsRef; + confident_ref_interval_length = 1; + inside_confident_ref_interval = true; + } + + last_position_considered = current_offset; + + if (alleleFreq.lodVsRef >= 5) { + out.print(alleleFreq.asGFFString()); + + /* + String gtype = genotypeTypeString(alleleFreq.qstar, alleleFreq.N); + System.out.print("DEBUG " + gtype + " "); + if (gtype.contentEquals("het")) { + System.out.println(alleleFreq.ref + "" + alleleFreq.alt); + } else if (gtype.contentEquals("hom")) { + System.out.println(alleleFreq.ref + "" + alleleFreq.ref); + } else { + System.out.println(alleleFreq.alt + "" + alleleFreq.alt); + } + */ + } + + return ""; + } }