- A lot of code cleaned up; separated metrics code from AlleleFrequencyMetricsWalker into AlleleMetrics and eliminated the former class. AFMW (aside from being a name so long that it warrants an acronym) can now be implemented by passing an option to AlleleFreqeuncyWalker that logs metrics to a file.
- AlleleMetrics and AlleleMetricrsWalker are now ready to take a list of clasess that implement the AllelicVariant interface - Switched a genome location in AlleleFrequencyEstimate from String to GenomeLoc which makes way more sense. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@280 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c6ab60ee04
commit
e3ac0cb500
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -15,11 +16,13 @@ import java.util.Arrays;
|
|||
import java.util.Random;
|
||||
import java.io.PrintStream;
|
||||
|
||||
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
|
||||
{
|
||||
@Argument public int N;
|
||||
@Argument(required=false,defaultValue="0") public int DOWNSAMPLE;
|
||||
@Argument public String GFF_OUTPUT_FILE;
|
||||
@Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS;
|
||||
@Argument(required=false, defaultValue="", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
|
||||
|
||||
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
|
||||
|
||||
|
|
@ -30,7 +33,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
{
|
||||
// Convert context data into bases and 4-base quals
|
||||
String bases = getBases(context);
|
||||
double quals[][] = getOneBaseQuals(context);
|
||||
double quals[][] = getQuals(context);
|
||||
|
||||
/*
|
||||
// DEBUG: print the data for a read
|
||||
|
|
@ -87,7 +90,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
}
|
||||
assert(altnum != -1);
|
||||
|
||||
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(context.getLocation().toString(), N, bases.getBytes(), quals, refnum, altnum, bases.length());
|
||||
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(context.getLocation(), N, bases.getBytes(), quals, refnum, altnum, bases.length());
|
||||
|
||||
alleleFreq.notes = String.format("A:%d C:%d G:%d T:%d",
|
||||
base_counts[nuc2num['A']],
|
||||
|
|
@ -108,6 +111,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
logger.debug(String.format(" => result is %s", alleleFreq));
|
||||
|
||||
if (LOG_METRICS) metrics.nextPosition(alleleFreq, rodData);
|
||||
|
||||
return alleleFreq;
|
||||
}
|
||||
|
||||
|
|
@ -128,7 +133,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
return new String(bases);
|
||||
}
|
||||
|
||||
static public double[][] getOneBaseQuals (LocusContext context)
|
||||
static public double[][] getQuals (LocusContext context)
|
||||
{
|
||||
int numReads = context.getReads().size(); //numReads();
|
||||
double[][] quals = new double[numReads][4];
|
||||
|
|
@ -155,13 +160,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
}else{
|
||||
assert (SQ_field instanceof byte[]);
|
||||
byte[] hex_quals = (byte[]) SQ_field;
|
||||
System.out.printf("SQ field (hex): %s\n", bytesToHexString(hex_quals));
|
||||
System.out.printf("SAM record: %s\n", read.format());
|
||||
//System.out.printf("SQ field (hex): %s\n", bytesToHexString(hex_quals));
|
||||
//System.out.printf("SAM record: %s\n", read.format());
|
||||
|
||||
int hex_qual = hex_quals[offset];
|
||||
int called2num = hex_qual & 0x3;
|
||||
double qual2 = (double)(hex_qual >> 2) / 100.0;
|
||||
System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
|
||||
//System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
|
||||
quals[i][called2num] = qual2;
|
||||
|
||||
//
|
||||
|
|
@ -191,7 +196,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
|
||||
}
|
||||
|
||||
public AlleleFrequencyEstimate AlleleFrequencyEstimator(String location, int N, byte[] bases, double[][] quals, int refnum, int altnum, int depth)
|
||||
public AlleleFrequencyEstimate AlleleFrequencyEstimator(GenomeLoc location, int N, byte[] bases, double[][] quals, int refnum, int altnum, int depth)
|
||||
{
|
||||
|
||||
// q = hypothetical %nonref
|
||||
|
|
@ -251,7 +256,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
Arrays.sort(bestMixtures);
|
||||
|
||||
// Calculate Lod of the mixture versus other possible
|
||||
// Answers how confident are we in the best mixture versus the next best mixture
|
||||
// Answers how confident are we in the best mixture versus the nextPosition best mixture
|
||||
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
|
||||
|
||||
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
|
||||
|
|
@ -375,48 +380,42 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
return result;
|
||||
}
|
||||
|
||||
private String confident_ref_interval_start = "";
|
||||
private String confident_ref_interval_contig = "";
|
||||
private long confident_ref_interval_start = 0;
|
||||
private double confident_ref_interval_LOD_sum = 0;
|
||||
private double confident_ref_interval_length = 0;
|
||||
private int last_position_considered = -1;
|
||||
private long last_position_considered = -1;
|
||||
private boolean inside_confident_ref_interval = false;
|
||||
AlleleMetrics metrics;
|
||||
|
||||
public String reduceInit()
|
||||
{
|
||||
confident_ref_interval_start = "";
|
||||
confident_ref_interval_contig = "";
|
||||
confident_ref_interval_start = 0;
|
||||
confident_ref_interval_LOD_sum = 0;
|
||||
confident_ref_interval_length = 0;
|
||||
last_position_considered = -1;
|
||||
inside_confident_ref_interval = false;
|
||||
return "";
|
||||
if (LOG_METRICS) metrics = new AlleleMetrics("SNTH");//METRICS_OUTPUT_FILE);
|
||||
return "";
|
||||
}
|
||||
|
||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||
{
|
||||
// Print RESULT data for confident calls
|
||||
|
||||
String[] tokens;
|
||||
tokens = alleleFreq.location.split(":");
|
||||
int current_offset = Integer.parseInt(tokens[1]);
|
||||
long current_offset = alleleFreq.location.getStart(); //Integer.parseInt(tokens[1]);
|
||||
|
||||
if (inside_confident_ref_interval &&
|
||||
((alleleFreq.lodVsRef > -5.0) || (current_offset != last_position_considered + 1)) )
|
||||
{
|
||||
// No longer hom-ref, so output a ref line.
|
||||
tokens = confident_ref_interval_start.split(":");
|
||||
|
||||
String contig = tokens[0];
|
||||
int start = Integer.parseInt(tokens[1]);
|
||||
|
||||
tokens = alleleFreq.location.split(":");
|
||||
int end = last_position_considered;
|
||||
|
||||
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
||||
|
||||
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
||||
contig,
|
||||
start,
|
||||
end,
|
||||
confident_ref_interval_contig,
|
||||
confident_ref_interval_start,
|
||||
last_position_considered,
|
||||
lod,
|
||||
(int)(confident_ref_interval_length));
|
||||
|
||||
|
|
@ -435,7 +434,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef <= -5.0))
|
||||
{
|
||||
// We moved into a hom-ref region so start a new interval.
|
||||
confident_ref_interval_start = alleleFreq.location;
|
||||
confident_ref_interval_contig = alleleFreq.location.getContig();
|
||||
confident_ref_interval_start = alleleFreq.location.getStart();
|
||||
confident_ref_interval_LOD_sum = alleleFreq.lodVsRef;
|
||||
confident_ref_interval_length = 1;
|
||||
inside_confident_ref_interval = true;
|
||||
|
|
@ -444,6 +444,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
last_position_considered = current_offset;
|
||||
|
||||
if (alleleFreq.lodVsRef >= 5) { this.output.print(alleleFreq.asGFFString()); }
|
||||
if (LOG_METRICS) metrics.printMetricsAtLocusIntervals(1000);
|
||||
return "";
|
||||
}
|
||||
|
||||
|
|
@ -492,17 +493,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
if (inside_confident_ref_interval)
|
||||
{
|
||||
// if we have a confident reference interval still hanging open, close it.
|
||||
String tokens[] = confident_ref_interval_start.split(":");
|
||||
String contig = tokens[0];
|
||||
int start = Integer.parseInt(tokens[1]);
|
||||
int end = last_position_considered;
|
||||
|
||||
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
||||
|
||||
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
||||
contig,
|
||||
start,
|
||||
end,
|
||||
confident_ref_interval_contig,
|
||||
confident_ref_interval_start,
|
||||
last_position_considered,
|
||||
lod,
|
||||
(int)(confident_ref_interval_length));
|
||||
|
||||
|
|
@ -522,6 +519,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
if (LOG_METRICS) metrics.printMetrics();
|
||||
}
|
||||
|
||||
static void print_base_qual_matrix(double [][]quals) {
|
||||
|
|
@ -569,7 +568,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0},
|
||||
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0}};
|
||||
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
|
||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator(null, N, het_bases, het_quals, 0, 1, 20);
|
||||
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
|
||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
||||
}
|
||||
|
|
@ -686,7 +685,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
int N = 10;
|
||||
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
||||
w.N = 10;
|
||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
|
||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator(null, N, het_bases, het_quals, 0, 1, 20);
|
||||
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
|
||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: andrewk
|
||||
* Date: Apr 2, 2009
|
||||
* Time: 9:01:44 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AlleleMetricsWalker {
|
||||
// Class that will walk over various metrics in a reference ordered way
|
||||
// This class walks over the genome in reference order and calls AlleleMetrics on each class
|
||||
// Hapmap and dbSNP tracks are taken from the command line
|
||||
// At first pass, this will at least be able to walk over a GFF file and compare to the hapmap and dbsnp
|
||||
// tracks specified on the command line and handed in via the LocusContext
|
||||
|
||||
public void map(List<AllelicVariant> avdata) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,10 +1,11 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public class AlleleFrequencyEstimate {
|
||||
//AlleleFrequencyEstimate();
|
||||
public String location;
|
||||
public GenomeLoc location;
|
||||
public char ref;
|
||||
public char alt;
|
||||
public int N;
|
||||
|
|
@ -15,7 +16,9 @@ public class AlleleFrequencyEstimate {
|
|||
public int depth;
|
||||
public String notes;
|
||||
|
||||
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
|
||||
GenomeLoc l;
|
||||
|
||||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
|
||||
{
|
||||
this.location = location;
|
||||
this.ref = ref;
|
||||
|
|
@ -31,12 +34,10 @@ public class AlleleFrequencyEstimate {
|
|||
|
||||
public String asGFFString()
|
||||
{
|
||||
String[] tokens;
|
||||
tokens = location.split(":");
|
||||
return String.format("%s\tCALLER\tVARIANT\t%s\t%s\t%f\t.\t.\tREF %c\t;\tALT %c\t;\tFREQ %f\n",
|
||||
tokens[0],
|
||||
tokens[1],
|
||||
tokens[1],
|
||||
location.getContig(),
|
||||
location.getStart(),
|
||||
location.getStart(),
|
||||
lodVsRef,
|
||||
ref,
|
||||
alt,
|
||||
|
|
|
|||
|
|
@ -1,25 +1,21 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
|
||||
import java.util.List;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: andrewk
|
||||
* Date: Mar 18, 2009
|
||||
* Time: 5:28:58 PM
|
||||
* Date: Apr 1, 2009
|
||||
* Time: 5:53:21 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEstimate, String>
|
||||
{
|
||||
public class AlleleMetrics {
|
||||
|
||||
long dbsnp_hits=0;
|
||||
long num_variants=0;
|
||||
|
|
@ -31,12 +27,24 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
|||
long hapmap_refvar_correct = 0;
|
||||
long hapmap_refvar_incorrect = 0;
|
||||
|
||||
AlleleFrequencyWalker caller;
|
||||
protected PrintStream out;
|
||||
|
||||
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
|
||||
{
|
||||
AlleleFrequencyEstimate alleleFreq = caller.map(rodData, ref, context);
|
||||
public AlleleMetrics(String MetricsOutputFile) {
|
||||
try
|
||||
{
|
||||
/*if ( MetricsOutputFile.equals("-") )
|
||||
this.out = out;
|
||||
else*/
|
||||
this.out = new PrintStream(MetricsOutputFile);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
public void nextPosition(AlleleFrequencyEstimate alleleFreq, List<ReferenceOrderedDatum> rodData) {
|
||||
num_loci_total += 1;
|
||||
|
||||
boolean is_dbSNP_SNP = false;
|
||||
|
|
@ -47,7 +55,7 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
|||
{
|
||||
if ( datum != null )
|
||||
{
|
||||
if ( datum instanceof rodDbSNP )
|
||||
if ( datum instanceof rodDbSNP)
|
||||
{
|
||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
|
||||
|
|
@ -64,9 +72,9 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
|||
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
|
||||
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
|
||||
{
|
||||
{
|
||||
// Confident variant.
|
||||
|
||||
|
||||
num_variants += 1;
|
||||
|
||||
if (is_dbSNP_SNP)
|
||||
|
|
@ -105,7 +113,8 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
|||
hapmap_genotype_correct++;
|
||||
}else{
|
||||
hapmap_genotype_incorrect++;
|
||||
System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
|
||||
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
|
||||
System.out.printf(" INCORRECT GENOTYPE");
|
||||
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
|
||||
}
|
||||
}
|
||||
|
|
@ -118,16 +127,14 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
|||
boolean called_var = alleleFreq.qstar != 0.0;
|
||||
if (hapmap_q != -1 && hapmap_var != called_var) {
|
||||
hapmap_refvar_incorrect++;
|
||||
System.out.printf(" INCORRECT REFVAR CALL");
|
||||
}else{
|
||||
hapmap_refvar_correct++;
|
||||
System.out.printf(" INCORRECT REFVAR CALL Bases: %s\n", AlleleFrequencyWalker.getBases(context));
|
||||
}
|
||||
}
|
||||
|
||||
out.print("\n");
|
||||
}
|
||||
|
||||
return alleleFreq;
|
||||
}
|
||||
|
||||
public void printMetrics()
|
||||
|
|
@ -159,23 +166,9 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
|||
out.println();
|
||||
}
|
||||
|
||||
public void onTraversalDone(String result)
|
||||
{
|
||||
printMetrics();
|
||||
public void printMetricsAtLocusIntervals(int loci_interval) {
|
||||
if (num_loci_total % loci_interval == 0) printMetrics();
|
||||
}
|
||||
|
||||
public String reduceInit()
|
||||
{
|
||||
caller = new AlleleFrequencyWalker();
|
||||
return "";
|
||||
}
|
||||
|
||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||
{
|
||||
if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asGFFString()); }
|
||||
if (this.num_loci_total % 1000 == 0) { printMetrics(); }
|
||||
return "null";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue