- A lot of code cleaned up; separated metrics code from AlleleFrequencyMetricsWalker into AlleleMetrics and eliminated the former class. AFMW (aside from being a name so long that it warrants an acronym) can now be implemented by passing an option to AlleleFreqeuncyWalker that logs metrics to a file.
- AlleleMetrics and AlleleMetricrsWalker are now ready to take a list of clasess that implement the AllelicVariant interface - Switched a genome location in AlleleFrequencyEstimate from String to GenomeLoc which makes way more sense. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@280 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c6ab60ee04
commit
e3ac0cb500
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||||
|
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -15,11 +16,13 @@ import java.util.Arrays;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>
|
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
|
||||||
{
|
{
|
||||||
@Argument public int N;
|
@Argument public int N;
|
||||||
@Argument(required=false,defaultValue="0") public int DOWNSAMPLE;
|
@Argument(required=false,defaultValue="0") public int DOWNSAMPLE;
|
||||||
@Argument public String GFF_OUTPUT_FILE;
|
@Argument public String GFF_OUTPUT_FILE;
|
||||||
|
@Argument(shortName="met", doc="Turns on logging of metrics on the fly with AlleleFrequency calculation") public boolean LOG_METRICS;
|
||||||
|
@Argument(required=false, defaultValue="", doc="Name of file where metrics will output") public String METRICS_OUTPUT_FILE;
|
||||||
|
|
||||||
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
|
protected static Logger logger = Logger.getLogger(AlleleFrequencyWalker.class);
|
||||||
|
|
||||||
|
|
@ -30,7 +33,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
{
|
{
|
||||||
// Convert context data into bases and 4-base quals
|
// Convert context data into bases and 4-base quals
|
||||||
String bases = getBases(context);
|
String bases = getBases(context);
|
||||||
double quals[][] = getOneBaseQuals(context);
|
double quals[][] = getQuals(context);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// DEBUG: print the data for a read
|
// DEBUG: print the data for a read
|
||||||
|
|
@ -87,7 +90,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
}
|
}
|
||||||
assert(altnum != -1);
|
assert(altnum != -1);
|
||||||
|
|
||||||
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(context.getLocation().toString(), N, bases.getBytes(), quals, refnum, altnum, bases.length());
|
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(context.getLocation(), N, bases.getBytes(), quals, refnum, altnum, bases.length());
|
||||||
|
|
||||||
alleleFreq.notes = String.format("A:%d C:%d G:%d T:%d",
|
alleleFreq.notes = String.format("A:%d C:%d G:%d T:%d",
|
||||||
base_counts[nuc2num['A']],
|
base_counts[nuc2num['A']],
|
||||||
|
|
@ -108,6 +111,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
|
|
||||||
logger.debug(String.format(" => result is %s", alleleFreq));
|
logger.debug(String.format(" => result is %s", alleleFreq));
|
||||||
|
|
||||||
|
if (LOG_METRICS) metrics.nextPosition(alleleFreq, rodData);
|
||||||
|
|
||||||
return alleleFreq;
|
return alleleFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -128,7 +133,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return new String(bases);
|
return new String(bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
static public double[][] getOneBaseQuals (LocusContext context)
|
static public double[][] getQuals (LocusContext context)
|
||||||
{
|
{
|
||||||
int numReads = context.getReads().size(); //numReads();
|
int numReads = context.getReads().size(); //numReads();
|
||||||
double[][] quals = new double[numReads][4];
|
double[][] quals = new double[numReads][4];
|
||||||
|
|
@ -155,13 +160,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
}else{
|
}else{
|
||||||
assert (SQ_field instanceof byte[]);
|
assert (SQ_field instanceof byte[]);
|
||||||
byte[] hex_quals = (byte[]) SQ_field;
|
byte[] hex_quals = (byte[]) SQ_field;
|
||||||
System.out.printf("SQ field (hex): %s\n", bytesToHexString(hex_quals));
|
//System.out.printf("SQ field (hex): %s\n", bytesToHexString(hex_quals));
|
||||||
System.out.printf("SAM record: %s\n", read.format());
|
//System.out.printf("SAM record: %s\n", read.format());
|
||||||
|
|
||||||
int hex_qual = hex_quals[offset];
|
int hex_qual = hex_quals[offset];
|
||||||
int called2num = hex_qual & 0x3;
|
int called2num = hex_qual & 0x3;
|
||||||
double qual2 = (double)(hex_qual >> 2) / 100.0;
|
double qual2 = (double)(hex_qual >> 2) / 100.0;
|
||||||
System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
|
//System.out.printf("2ND %x %d %f\n", hex_qual, called2num, qual2);
|
||||||
quals[i][called2num] = qual2;
|
quals[i][called2num] = qual2;
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
@ -191,7 +196,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
|
return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
|
||||||
}
|
}
|
||||||
|
|
||||||
public AlleleFrequencyEstimate AlleleFrequencyEstimator(String location, int N, byte[] bases, double[][] quals, int refnum, int altnum, int depth)
|
public AlleleFrequencyEstimate AlleleFrequencyEstimator(GenomeLoc location, int N, byte[] bases, double[][] quals, int refnum, int altnum, int depth)
|
||||||
{
|
{
|
||||||
|
|
||||||
// q = hypothetical %nonref
|
// q = hypothetical %nonref
|
||||||
|
|
@ -251,7 +256,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
Arrays.sort(bestMixtures);
|
Arrays.sort(bestMixtures);
|
||||||
|
|
||||||
// Calculate Lod of the mixture versus other possible
|
// Calculate Lod of the mixture versus other possible
|
||||||
// Answers how confident are we in the best mixture versus the next best mixture
|
// Answers how confident are we in the best mixture versus the nextPosition best mixture
|
||||||
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
|
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
|
||||||
|
|
||||||
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
|
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
|
||||||
|
|
@ -375,48 +380,42 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String confident_ref_interval_start = "";
|
private String confident_ref_interval_contig = "";
|
||||||
|
private long confident_ref_interval_start = 0;
|
||||||
private double confident_ref_interval_LOD_sum = 0;
|
private double confident_ref_interval_LOD_sum = 0;
|
||||||
private double confident_ref_interval_length = 0;
|
private double confident_ref_interval_length = 0;
|
||||||
private int last_position_considered = -1;
|
private long last_position_considered = -1;
|
||||||
private boolean inside_confident_ref_interval = false;
|
private boolean inside_confident_ref_interval = false;
|
||||||
|
AlleleMetrics metrics;
|
||||||
|
|
||||||
public String reduceInit()
|
public String reduceInit()
|
||||||
{
|
{
|
||||||
confident_ref_interval_start = "";
|
confident_ref_interval_contig = "";
|
||||||
|
confident_ref_interval_start = 0;
|
||||||
confident_ref_interval_LOD_sum = 0;
|
confident_ref_interval_LOD_sum = 0;
|
||||||
confident_ref_interval_length = 0;
|
confident_ref_interval_length = 0;
|
||||||
last_position_considered = -1;
|
last_position_considered = -1;
|
||||||
inside_confident_ref_interval = false;
|
inside_confident_ref_interval = false;
|
||||||
return "";
|
if (LOG_METRICS) metrics = new AlleleMetrics("SNTH");//METRICS_OUTPUT_FILE);
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||||
{
|
{
|
||||||
// Print RESULT data for confident calls
|
// Print RESULT data for confident calls
|
||||||
|
|
||||||
String[] tokens;
|
long current_offset = alleleFreq.location.getStart(); //Integer.parseInt(tokens[1]);
|
||||||
tokens = alleleFreq.location.split(":");
|
|
||||||
int current_offset = Integer.parseInt(tokens[1]);
|
|
||||||
|
|
||||||
if (inside_confident_ref_interval &&
|
if (inside_confident_ref_interval &&
|
||||||
((alleleFreq.lodVsRef > -5.0) || (current_offset != last_position_considered + 1)) )
|
((alleleFreq.lodVsRef > -5.0) || (current_offset != last_position_considered + 1)) )
|
||||||
{
|
{
|
||||||
// No longer hom-ref, so output a ref line.
|
// No longer hom-ref, so output a ref line.
|
||||||
tokens = confident_ref_interval_start.split(":");
|
|
||||||
|
|
||||||
String contig = tokens[0];
|
|
||||||
int start = Integer.parseInt(tokens[1]);
|
|
||||||
|
|
||||||
tokens = alleleFreq.location.split(":");
|
|
||||||
int end = last_position_considered;
|
|
||||||
|
|
||||||
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
||||||
|
|
||||||
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
||||||
contig,
|
confident_ref_interval_contig,
|
||||||
start,
|
confident_ref_interval_start,
|
||||||
end,
|
last_position_considered,
|
||||||
lod,
|
lod,
|
||||||
(int)(confident_ref_interval_length));
|
(int)(confident_ref_interval_length));
|
||||||
|
|
||||||
|
|
@ -435,7 +434,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef <= -5.0))
|
else if ((!inside_confident_ref_interval) && (alleleFreq.lodVsRef <= -5.0))
|
||||||
{
|
{
|
||||||
// We moved into a hom-ref region so start a new interval.
|
// We moved into a hom-ref region so start a new interval.
|
||||||
confident_ref_interval_start = alleleFreq.location;
|
confident_ref_interval_contig = alleleFreq.location.getContig();
|
||||||
|
confident_ref_interval_start = alleleFreq.location.getStart();
|
||||||
confident_ref_interval_LOD_sum = alleleFreq.lodVsRef;
|
confident_ref_interval_LOD_sum = alleleFreq.lodVsRef;
|
||||||
confident_ref_interval_length = 1;
|
confident_ref_interval_length = 1;
|
||||||
inside_confident_ref_interval = true;
|
inside_confident_ref_interval = true;
|
||||||
|
|
@ -444,6 +444,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
last_position_considered = current_offset;
|
last_position_considered = current_offset;
|
||||||
|
|
||||||
if (alleleFreq.lodVsRef >= 5) { this.output.print(alleleFreq.asGFFString()); }
|
if (alleleFreq.lodVsRef >= 5) { this.output.print(alleleFreq.asGFFString()); }
|
||||||
|
if (LOG_METRICS) metrics.printMetricsAtLocusIntervals(1000);
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -492,17 +493,13 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
if (inside_confident_ref_interval)
|
if (inside_confident_ref_interval)
|
||||||
{
|
{
|
||||||
// if we have a confident reference interval still hanging open, close it.
|
// if we have a confident reference interval still hanging open, close it.
|
||||||
String tokens[] = confident_ref_interval_start.split(":");
|
|
||||||
String contig = tokens[0];
|
|
||||||
int start = Integer.parseInt(tokens[1]);
|
|
||||||
int end = last_position_considered;
|
|
||||||
|
|
||||||
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
double lod = confident_ref_interval_LOD_sum / confident_ref_interval_length;
|
||||||
|
|
||||||
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
output.format("%s\tCALLER\tREFERENCE\t%d\t%d\t%f\t.\t.\tLENGTH %d\n",
|
||||||
contig,
|
confident_ref_interval_contig,
|
||||||
start,
|
confident_ref_interval_start,
|
||||||
end,
|
last_position_considered,
|
||||||
lod,
|
lod,
|
||||||
(int)(confident_ref_interval_length));
|
(int)(confident_ref_interval_length));
|
||||||
|
|
||||||
|
|
@ -522,6 +519,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (LOG_METRICS) metrics.printMetrics();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_base_qual_matrix(double [][]quals) {
|
static void print_base_qual_matrix(double [][]quals) {
|
||||||
|
|
@ -569,7 +568,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0},
|
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0},
|
||||||
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0}};
|
{0.001/3.0, 0.999, 0.001/3.0, 0.001/3.0}};
|
||||||
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
||||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
|
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator(null, N, het_bases, het_quals, 0, 1, 20);
|
||||||
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
|
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
|
||||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
||||||
}
|
}
|
||||||
|
|
@ -686,7 +685,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
int N = 10;
|
int N = 10;
|
||||||
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
||||||
w.N = 10;
|
w.N = 10;
|
||||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
|
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator(null, N, het_bases, het_quals, 0, 1, 20);
|
||||||
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
|
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
|
||||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: andrewk
|
||||||
|
* Date: Apr 2, 2009
|
||||||
|
* Time: 9:01:44 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class AlleleMetricsWalker {
|
||||||
|
// Class that will walk over various metrics in a reference ordered way
|
||||||
|
// This class walks over the genome in reference order and calls AlleleMetrics on each class
|
||||||
|
// Hapmap and dbSNP tracks are taken from the command line
|
||||||
|
// At first pass, this will at least be able to walk over a GFF file and compare to the hapmap and dbsnp
|
||||||
|
// tracks specified on the command line and handed in via the LocusContext
|
||||||
|
|
||||||
|
public void map(List<AllelicVariant> avdata) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
package org.broadinstitute.sting.playground.utils;
|
package org.broadinstitute.sting.playground.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
public class AlleleFrequencyEstimate {
|
public class AlleleFrequencyEstimate {
|
||||||
//AlleleFrequencyEstimate();
|
//AlleleFrequencyEstimate();
|
||||||
public String location;
|
public GenomeLoc location;
|
||||||
public char ref;
|
public char ref;
|
||||||
public char alt;
|
public char alt;
|
||||||
public int N;
|
public int N;
|
||||||
|
|
@ -15,7 +16,9 @@ public class AlleleFrequencyEstimate {
|
||||||
public int depth;
|
public int depth;
|
||||||
public String notes;
|
public String notes;
|
||||||
|
|
||||||
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
|
GenomeLoc l;
|
||||||
|
|
||||||
|
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
|
||||||
{
|
{
|
||||||
this.location = location;
|
this.location = location;
|
||||||
this.ref = ref;
|
this.ref = ref;
|
||||||
|
|
@ -31,12 +34,10 @@ public class AlleleFrequencyEstimate {
|
||||||
|
|
||||||
public String asGFFString()
|
public String asGFFString()
|
||||||
{
|
{
|
||||||
String[] tokens;
|
|
||||||
tokens = location.split(":");
|
|
||||||
return String.format("%s\tCALLER\tVARIANT\t%s\t%s\t%f\t.\t.\tREF %c\t;\tALT %c\t;\tFREQ %f\n",
|
return String.format("%s\tCALLER\tVARIANT\t%s\t%s\t%f\t.\t.\tREF %c\t;\tALT %c\t;\tFREQ %f\n",
|
||||||
tokens[0],
|
location.getContig(),
|
||||||
tokens[1],
|
location.getStart(),
|
||||||
tokens[1],
|
location.getStart(),
|
||||||
lodVsRef,
|
lodVsRef,
|
||||||
ref,
|
ref,
|
||||||
alt,
|
alt,
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,21 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.utils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: andrewk
|
* User: andrewk
|
||||||
* Date: Mar 18, 2009
|
* Date: Apr 1, 2009
|
||||||
* Time: 5:28:58 PM
|
* Time: 5:53:21 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
|
public class AlleleMetrics {
|
||||||
public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEstimate, String>
|
|
||||||
{
|
|
||||||
|
|
||||||
long dbsnp_hits=0;
|
long dbsnp_hits=0;
|
||||||
long num_variants=0;
|
long num_variants=0;
|
||||||
|
|
@ -31,12 +27,24 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
||||||
long hapmap_refvar_correct = 0;
|
long hapmap_refvar_correct = 0;
|
||||||
long hapmap_refvar_incorrect = 0;
|
long hapmap_refvar_incorrect = 0;
|
||||||
|
|
||||||
AlleleFrequencyWalker caller;
|
protected PrintStream out;
|
||||||
|
|
||||||
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
|
public AlleleMetrics(String MetricsOutputFile) {
|
||||||
{
|
try
|
||||||
AlleleFrequencyEstimate alleleFreq = caller.map(rodData, ref, context);
|
{
|
||||||
|
/*if ( MetricsOutputFile.equals("-") )
|
||||||
|
this.out = out;
|
||||||
|
else*/
|
||||||
|
this.out = new PrintStream(MetricsOutputFile);
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
e.printStackTrace();
|
||||||
|
System.exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void nextPosition(AlleleFrequencyEstimate alleleFreq, List<ReferenceOrderedDatum> rodData) {
|
||||||
num_loci_total += 1;
|
num_loci_total += 1;
|
||||||
|
|
||||||
boolean is_dbSNP_SNP = false;
|
boolean is_dbSNP_SNP = false;
|
||||||
|
|
@ -47,7 +55,7 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
||||||
{
|
{
|
||||||
if ( datum != null )
|
if ( datum != null )
|
||||||
{
|
{
|
||||||
if ( datum instanceof rodDbSNP )
|
if ( datum instanceof rodDbSNP)
|
||||||
{
|
{
|
||||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||||
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
|
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
|
||||||
|
|
@ -64,9 +72,9 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
||||||
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
|
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||||
|
|
||||||
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
|
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
|
||||||
{
|
{
|
||||||
// Confident variant.
|
// Confident variant.
|
||||||
|
|
||||||
num_variants += 1;
|
num_variants += 1;
|
||||||
|
|
||||||
if (is_dbSNP_SNP)
|
if (is_dbSNP_SNP)
|
||||||
|
|
@ -105,7 +113,8 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
||||||
hapmap_genotype_correct++;
|
hapmap_genotype_correct++;
|
||||||
}else{
|
}else{
|
||||||
hapmap_genotype_incorrect++;
|
hapmap_genotype_incorrect++;
|
||||||
System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
|
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
|
||||||
|
System.out.printf(" INCORRECT GENOTYPE");
|
||||||
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
|
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -118,16 +127,14 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
||||||
boolean called_var = alleleFreq.qstar != 0.0;
|
boolean called_var = alleleFreq.qstar != 0.0;
|
||||||
if (hapmap_q != -1 && hapmap_var != called_var) {
|
if (hapmap_q != -1 && hapmap_var != called_var) {
|
||||||
hapmap_refvar_incorrect++;
|
hapmap_refvar_incorrect++;
|
||||||
|
System.out.printf(" INCORRECT REFVAR CALL");
|
||||||
}else{
|
}else{
|
||||||
hapmap_refvar_correct++;
|
hapmap_refvar_correct++;
|
||||||
System.out.printf(" INCORRECT REFVAR CALL Bases: %s\n", AlleleFrequencyWalker.getBases(context));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out.print("\n");
|
out.print("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
return alleleFreq;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printMetrics()
|
public void printMetrics()
|
||||||
|
|
@ -159,23 +166,9 @@ public class AlleleFrequencyMetricsWalker extends LocusWalker<AlleleFrequencyEst
|
||||||
out.println();
|
out.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(String result)
|
public void printMetricsAtLocusIntervals(int loci_interval) {
|
||||||
{
|
if (num_loci_total % loci_interval == 0) printMetrics();
|
||||||
printMetrics();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String reduceInit()
|
|
||||||
{
|
|
||||||
caller = new AlleleFrequencyWalker();
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
|
||||||
{
|
|
||||||
if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asGFFString()); }
|
|
||||||
if (this.num_loci_total % 1000 == 0) { printMetrics(); }
|
|
||||||
return "null";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue