- A lot of code cleaned up; separated metrics code from AlleleFrequencyMetricsWalker into AlleleMetrics and eliminated the former class. AFMW (aside from being a name so long that it warrants an acronym) can now be implemented by passing an option to AlleleFreqeuncyWalker that logs metrics to a file.

- AlleleMetrics and AlleleMetricrsWalker are now ready to take a list of clasess that implement the AllelicVariant interface
- Switched a genome location in AlleleFrequencyEstimate from String to GenomeLoc which makes way more sense.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@280 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-04-03 02:09:10 +00:00
parent c6ab60ee04
commit e3ac0cb500
4 changed files with 101 additions and 81 deletions

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger;
@ -15,11 +16,13 @@ import java.util.Arrays;
import java.util.Random;
import java.io.PrintStream;
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
{
@Argument public int N;
@Argument(required=false,defaultValue="0") public int DOWNSAMPLE;
@Argument public String GFF_OUTPUT_FILE;
@Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS;
@Argument(required=false, defaultValue="", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
@ -30,7 +33,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
{
// Convert context data into bases and 4-base quals
String bases = getBases(context);
double quals[][] = getOneBaseQuals(context);
double quals[][] = getQuals(context);
/*
// DEBUG: print the data for a read
@ -87,7 +90,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
}
assert(altnum != -1);
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(context.getLocation().toString(), N, bases.getBytes(), quals, refnum, altnum, bases.length());
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(context.getLocation(), N, bases.getBytes(), quals, refnum, altnum, bases.length());
alleleFreq.notes = String.format("A:%d C:%d G:%d T:%d",
base_counts[nuc2num['A']],
@ -108,6 +111,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
logger.debug(String.format(" => result is %s", alleleFreq));
if (LOG_METRICS) metrics.nextPosition(alleleFreq, rodData);
return alleleFreq;
}
@ -128,7 +133,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return new String(bases);
}
static public double[][] getOneBaseQuals (LocusContext context)
static public double[][] getQuals (LocusContext context)
{
int numReads = context.getReads().size(); //numReads();
double[][] quals = new double[numReads][4];
@ -155,13 +160,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
}else{
assert (SQ_field instanceof byte[]);
byte[] hex_quals = (byte[]) SQ_field;
System.out.printf("SQ field (hex): %s\n", bytesToHexString(hex_quals));
System.out.printf("SAM record: %s\n", read.format());
//System.out.printf("SQ field (hex): %s\n", bytesToHexString(hex_quals));
//System.out.printf("SAM record: %s\n", read.format());
int hex_qual = hex_quals[offset];
int called2num = hex_qual & 0x3;
double qual2 = (double)(hex_qual >> 2) / 100.0;
System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
//System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
quals[i][called2num] = qual2;
//
@ -191,7 +196,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
}
public AlleleFrequencyEstimate AlleleFrequencyEstimator(String location, int N, byte[] bases, double[][] quals, int refnum, int altnum, int depth)
public AlleleFrequencyEstimate AlleleFrequencyEstimator(GenomeLoc location, int N, byte[] bases, double[][] quals, int refnum, int altnum, int depth)
{
// q = hypothetical %nonref
@ -251,7 +256,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
Arrays.sort(bestMixtures);
// Calculate Lod of the mixture versus other possible
// Answers how confident are we in the best mixture versus the next best mixture
// Answers how confident are we in the best mixture versus the nextPosition best mixture
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
@ -375,48 +380,42 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return result;
}
private String confident_ref_interval_start = "";
private String confident_ref_interval_contig = "";
private long confident_ref_interval_start = 0;
private double confident_ref_interval_LOD_sum = 0;
private double confident_ref_interval_length = 0;
private int last_position_considered = -1;
private long last_position_considered = -1;
private boolean inside_confident_ref_interval = false;
AlleleMetrics metrics;
public String reduceInit()
{
confident_ref_interval_start = "";
confident_ref_interval_contig = "";
confident_ref_interval_start = 0;
confident_ref_interval_LOD_sum = 0;
confident_ref_interval_length = 0;
last_position_considered = -1;
inside_confident_ref_interval = false;
return "";
if (LOG_METRICS) metrics = new AlleleMetrics("SNTH");//METRICS_OUTPUT_FILE);
return "";
}
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
{
// Print RESULT data for confident calls
String[] tokens;
tokens = alleleFreq.location.split(":");
int current_offset = Integer.parseInt(tokens[1]);
long current_offset = alleleFreq.location.getStart(); //Integer.parseInt(tokens[1]);
if (inside_confident_ref_interval &&
((alleleFreq.lodVsRef > -5.0) || (current_offset != last_position_considered + 1)) )
{
// No longer hom-ref, so output a ref line.
tokens = confident_ref_interval_start.split(":");
String contig = tokens[0];
int start = Integer.parseInt(tokens[1]);
tokens = alleleFreq.location.split(":");
int end = last_position_considered;
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
contig,
start,
end,
confident_ref_interval_contig,
confident_ref_interval_start,
last_position_considered,
lod,
(int)(confident_ref_interval_length));
@ -435,7 +434,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef <= -5.0))
{
// We moved into a hom-ref region so start a new interval.
confident_ref_interval_start = alleleFreq.location;
confident_ref_interval_contig = alleleFreq.location.getContig();
confident_ref_interval_start = alleleFreq.location.getStart();
confident_ref_interval_LOD_sum = alleleFreq.lodVsRef;
confident_ref_interval_length = 1;
inside_confident_ref_interval = true;
@ -444,6 +444,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
last_position_considered = current_offset;
if (alleleFreq.lodVsRef >= 5) { this.output.print(alleleFreq.asGFFString()); }
if (LOG_METRICS) metrics.printMetricsAtLocusIntervals(1000);
return "";
}
@ -492,17 +493,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
if (inside_confident_ref_interval)
{
// if we have a confident reference interval still hanging open, close it.
String tokens[] = confident_ref_interval_start.split(":");
String contig = tokens[0];
int start = Integer.parseInt(tokens[1]);
int end = last_position_considered;
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
contig,
start,
end,
confident_ref_interval_contig,
confident_ref_interval_start,
last_position_considered,
lod,
(int)(confident_ref_interval_length));
@ -522,6 +519,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
e.printStackTrace();
System.exit(-1);
}
if (LOG_METRICS) metrics.printMetrics();
}
static void print_base_qual_matrix(double [][]quals) {
@ -569,7 +568,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0},
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0}};
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator(null, N, het_bases, het_quals, 0, 1, 20);
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
}
@ -686,7 +685,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
int N = 10;
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
w.N = 10;
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator(null, N, het_bases, het_quals, 0, 1, 20);
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
}

View File

@ -0,0 +1,27 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Apr 2, 2009
* Time: 9:01:44 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleMetricsWalker {
// Class that will walk over various metrics in a reference ordered way
// This class walks over the genome in reference order and calls AlleleMetrics on each class
// Hapmap and dbSNP tracks are taken from the command line
// At first pass, this will at least be able to walk over a GFF file and compare to the hapmap and dbsnp
// tracks specified on the command line and handed in via the LocusContext
public void map(List<AllelicVariant> avdata) {
}
}

View File

@ -1,10 +1,11 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
public class AlleleFrequencyEstimate {
//AlleleFrequencyEstimate();
public String location;
public GenomeLoc location;
public char ref;
public char alt;
public int N;
@ -15,7 +16,9 @@ public class AlleleFrequencyEstimate {
public int depth;
public String notes;
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
GenomeLoc l;
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
{
this.location = location;
this.ref = ref;
@ -31,12 +34,10 @@ public class AlleleFrequencyEstimate {
public String asGFFString()
{
String[] tokens;
tokens = location.split(":");
return String.format("%s\tCALLER\tVARIANT\t%s\t%s\t%f\t.\t.\tREF %c\t;\tALT %c\t;\tFREQ %f\n",
tokens[0],
tokens[1],
tokens[1],
location.getContig(),
location.getStart(),
location.getStart(),
lodVsRef,
ref,
alt,

View File

@ -1,25 +1,21 @@
package org.broadinstitute.sting.playground.gatk.walkers;
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import java.util.List;
import java.io.PrintStream;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Mar 18, 2009
* Time: 5:28:58 PM
* Date: Apr 1, 2009
* Time: 5:53:21 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEstimate, String>
{
public class AlleleMetrics {
long dbsnp_hits=0;
long num_variants=0;
@ -31,12 +27,24 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
long hapmap_refvar_correct = 0;
long hapmap_refvar_incorrect = 0;
AlleleFrequencyWalker caller;
protected PrintStream out;
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
{
AlleleFrequencyEstimate alleleFreq = caller.map(rodData, ref, context);
public AlleleMetrics(String MetricsOutputFile) {
try
{
/*if ( MetricsOutputFile.equals("-") )
this.out = out;
else*/
this.out = new PrintStream(MetricsOutputFile);
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
}
public void nextPosition(AlleleFrequencyEstimate alleleFreq, List<ReferenceOrderedDatum> rodData) {
num_loci_total += 1;
boolean is_dbSNP_SNP = false;
@ -47,7 +55,7 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
{
if ( datum != null )
{
if ( datum instanceof rodDbSNP )
if ( datum instanceof rodDbSNP)
{
rodDbSNP dbsnp = (rodDbSNP)datum;
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
@ -64,9 +72,9 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
{
{
// Confident variant.
num_variants += 1;
if (is_dbSNP_SNP)
@ -105,7 +113,8 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
hapmap_genotype_correct++;
}else{
hapmap_genotype_incorrect++;
System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
System.out.printf(" INCORRECT GENOTYPE");
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
}
@ -118,16 +127,14 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
boolean called_var = alleleFreq.qstar != 0.0;
if (hapmap_q != -1 && hapmap_var != called_var) {
hapmap_refvar_incorrect++;
System.out.printf(" INCORRECT REFVAR CALL");
}else{
hapmap_refvar_correct++;
System.out.printf(" INCORRECT REFVAR CALL Bases: %s\n", AlleleFrequencyWalker.getBases(context));
}
}
out.print("\n");
}
return alleleFreq;
}
public void printMetrics()
@ -159,23 +166,9 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
out.println();
}
public void onTraversalDone(String result)
{
printMetrics();
public void printMetricsAtLocusIntervals(int loci_interval) {
if (num_loci_total % loci_interval == 0) printMetrics();
}
public String reduceInit()
{
caller = new AlleleFrequencyWalker();
return "";
}
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
{
if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asGFFString()); }
if (this.num_loci_total % 1000 == 0) { printMetrics(); }
return "null";
}
}