VQSR now operates on LOD scores in the INFO field directly, and doesn't adjust the QUAL field. New format for tranches file uses LOD score. Old file format no longer supported. log10sumlog10() function, a very useful utility in MathUtils. No more ExtendedPileupElement! Robust math calculations in GMM so that no infinities are generated! HaplotypeScore refactored to enable use of filtered context. Not yet enabled... InferredContext getDouble and getInteger arguments now parse values from Strings if necessary
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4684 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5b83942cee
commit
ef2f6d90d2
|
|
@ -92,13 +92,10 @@ public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<
|
||||||
|
|
||||||
GenomeLoc loc;
|
GenomeLoc loc;
|
||||||
String chr = st.nextToken();
|
String chr = st.nextToken();
|
||||||
if ( chr.indexOf(":") != -1 ) {
|
if ( chr.indexOf(":") != -1 )
|
||||||
loc = genomeLocParser.parseGenomeInterval(chr);
|
loc = genomeLocParser.parseGenomeInterval(chr);
|
||||||
} else {
|
else
|
||||||
if ( st.countTokens() < 3 )
|
|
||||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
|
||||||
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
||||||
}
|
|
||||||
return new AnnotatorInputTableFeature(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
return new AnnotatorInputTableFeature(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
|
|
@ -47,6 +48,11 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 20;
|
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 20;
|
||||||
private final static char REGEXP_WILDCARD = '.';
|
private final static char REGEXP_WILDCARD = '.';
|
||||||
|
|
||||||
|
public boolean useRead(PileupElement p) {
|
||||||
|
return ! ReadUtils.is454Read(p.getRead());
|
||||||
|
//return ! (p.getRead() instanceof GATKSAMRecord && ! ((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset())) && ! ReadUtils.is454Read(p.getRead());
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -88,15 +94,11 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
||||||
PriorityQueue<Haplotype> haplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
|
PriorityQueue<Haplotype> haplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
|
||||||
|
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
if ( useRead(p) ) {
|
||||||
if (ReadUtils.is454Read(p.getRead()))
|
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
||||||
continue;
|
haplotypeQueue.add(haplotypeFromRead);
|
||||||
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
}
|
||||||
|
|
||||||
|
|
||||||
haplotypeQueue.add(haplotypeFromRead);
|
|
||||||
//haplotypeList.add(haplotypeFromRead);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
||||||
|
|
@ -161,7 +163,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
|
private Haplotype getHaplotypeFromRead(PileupElement p, int contextSize) {
|
||||||
SAMRecord read = p.getRead();
|
SAMRecord read = p.getRead();
|
||||||
int readOffsetFromPileup = p.getOffset();
|
int readOffsetFromPileup = p.getOffset();
|
||||||
int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
|
int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
|
||||||
|
|
@ -239,24 +241,28 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
|
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
|
||||||
|
|
||||||
double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
|
double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
|
||||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
int pileupOffset = 0;
|
||||||
SAMRecord read = p.getRead();
|
for ( PileupElement p : pileup ) {
|
||||||
int readOffsetFromPileup = p.getOffset();
|
if ( useRead(p) ) {
|
||||||
|
SAMRecord read = p.getRead();
|
||||||
|
int readOffsetFromPileup = p.getOffset();
|
||||||
|
|
||||||
if (ReadUtils.is454Read(read))
|
if (ReadUtils.is454Read(read))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
||||||
double m = 10000000;
|
double m = 10000000;
|
||||||
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
||||||
Haplotype haplotype = haplotypes.get(i);
|
Haplotype haplotype = haplotypes.get(i);
|
||||||
int start = readOffsetFromPileup - (contextSize - 1)/2;
|
int start = readOffsetFromPileup - (contextSize - 1)/2;
|
||||||
double score = scoreReadAgainstHaplotype(read, start, contextSize, haplotype);
|
double score = scoreReadAgainstHaplotype(read, start, contextSize, haplotype);
|
||||||
haplotypeScores[p.getPileupOffset()][i] = score;
|
haplotypeScores[pileupOffset][i] = score;
|
||||||
if ( DEBUG ) System.out.printf(" vs. haplotype %d = %f%n", i, score);
|
if ( DEBUG ) System.out.printf(" vs. haplotype %d = %f%n", i, score);
|
||||||
m = Math.min(score, m);
|
m = Math.min(score, m);
|
||||||
|
}
|
||||||
|
if ( DEBUG ) System.out.printf(" => best score was %f%n", m);
|
||||||
}
|
}
|
||||||
if ( DEBUG ) System.out.printf(" => best score was %f%n", m);
|
pileupOffset++; // todo -- remove me
|
||||||
}
|
}
|
||||||
|
|
||||||
double overallScore = 0.0;
|
double overallScore = 0.0;
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.report.ReportMarshaller;
|
import org.broadinstitute.sting.utils.report.ReportMarshaller;
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory;
|
import org.broadinstitute.sting.utils.report.VE2ReportFactory;
|
||||||
|
|
@ -307,7 +308,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
// we are going to build a few select names automatically from the tranches file
|
// we are going to build a few select names automatically from the tranches file
|
||||||
for ( Tranche t : Tranche.readTraches(new File(TRANCHE_FILENAME)) ) {
|
for ( Tranche t : Tranche.readTraches(new File(TRANCHE_FILENAME)) ) {
|
||||||
logger.info("Adding select for all variant above the pCut of : " + t);
|
logger.info("Adding select for all variant above the pCut of : " + t);
|
||||||
SELECT_EXPS.add(String.format("QUAL >= %.2f", t.pCut));
|
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
|
||||||
SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr));
|
SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -100,11 +100,11 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
||||||
if( tranches.size() >= 2 ) {
|
if( tranches.size() >= 2 ) {
|
||||||
for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
|
for( int iii = 0; iii < tranches.size() - 1; iii++ ) {
|
||||||
Tranche t = tranches.get(iii);
|
Tranche t = tranches.get(iii);
|
||||||
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("FDR tranche level at qual: " + t.pCut + " <= x < " + tranches.get(iii+1).pCut)));
|
hInfo.add(new VCFFilterHeaderLine(t.name, String.format("FDR tranche level at VSQ Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii+1).minVQSLod)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( tranches.size() >= 1 ) {
|
if( tranches.size() >= 1 ) {
|
||||||
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("FDR tranche level at qual < " + tranches.get(0).pCut)));
|
hInfo.add(new VCFFilterHeaderLine(tranches.get(0).name + "+", String.format("FDR tranche level at VQS Lod < " + tranches.get(0).minVQSLod)));
|
||||||
} else {
|
} else {
|
||||||
throw new UserException("No tranches were found in the file or were above the FDR Filter level " + FDR_FILTER_LEVEL);
|
throw new UserException("No tranches were found in the file or were above the FDR Filter level " + FDR_FILTER_LEVEL);
|
||||||
}
|
}
|
||||||
|
|
@ -131,28 +131,31 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
||||||
if( vc != null && !vc.getSource().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
if( vc != null && !vc.getSource().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
||||||
String filterString = null;
|
String filterString = null;
|
||||||
if( !vc.isFiltered() ) {
|
if( !vc.isFiltered() ) {
|
||||||
final double qual = vc.getPhredScaledQual();
|
try {
|
||||||
|
final double lod = vc.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY);
|
||||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||||
Tranche tranche = tranches.get(i);
|
Tranche tranche = tranches.get(i);
|
||||||
if( qual >= tranche.pCut ) {
|
if( lod >= tranche.minVQSLod ) {
|
||||||
if (i == tranches.size() - 1) {
|
if (i == tranches.size() - 1) {
|
||||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||||
} else {
|
} else {
|
||||||
filterString = tranche.name;
|
filterString = tranche.name;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if( filterString == null ) {
|
if( filterString == null ) {
|
||||||
filterString = tranches.get(0).name+"+";
|
filterString = tranches.get(0).name+"+";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||||
Set<String> filters = new HashSet<String>();
|
Set<String> filters = new HashSet<String>();
|
||||||
filters.add(filterString);
|
filters.add(filterString);
|
||||||
vc = VariantContext.modifyFilters(vc, filters);
|
vc = VariantContext.modifyFilters(vc, filters);
|
||||||
|
}
|
||||||
|
} catch ( ClassCastException e ) {
|
||||||
|
throw new UserException.MalformedFile(vc.getSource(), "Invalid value for VQS key " + VariantRecalibrator.VQS_LOD_KEY + " at variant " + vc, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -183,7 +183,6 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
||||||
|
|
||||||
final VariantDatum variantDatum = new VariantDatum();
|
final VariantDatum variantDatum = new VariantDatum();
|
||||||
variantDatum.annotations = annotationValues;
|
variantDatum.annotations = annotationValues;
|
||||||
variantDatum.qual = vc.getPhredScaledQual();
|
|
||||||
|
|
||||||
final DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
final DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||||
final Collection<VariantContext> vcsHapMap = tracker.getVariantContexts(ref, "hapmap", null, context.getLocation(), false, true);
|
final Collection<VariantContext> vcsHapMap = tracker.getVariantContexts(ref, "hapmap", null, context.getLocation(), false, true);
|
||||||
|
|
@ -201,7 +200,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
||||||
variantDatum.weight = WEIGHT_DBSNP;
|
variantDatum.weight = WEIGHT_DBSNP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( variantDatum.weight > 0.0 && variantDatum.qual > QUAL_THRESHOLD ) {
|
if( variantDatum.weight > 0.0 && vc.getPhredScaledQual() > QUAL_THRESHOLD ) {
|
||||||
mapList.add( variantDatum );
|
mapList.add( variantDatum );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,33 +49,35 @@ import java.util.*;
|
||||||
public class Tranche implements Comparable<Tranche> {
|
public class Tranche implements Comparable<Tranche> {
|
||||||
private static final int CURRENT_VERSION = 2;
|
private static final int CURRENT_VERSION = 2;
|
||||||
|
|
||||||
public double fdr, pCut, targetTiTv, knownTiTv, novelTiTv;
|
public double fdr, minVQSLod, targetTiTv, knownTiTv, novelTiTv;
|
||||||
public int numKnown,numNovel;
|
public int numKnown,numNovel;
|
||||||
public String name;
|
public String name;
|
||||||
|
|
||||||
public Tranche(double fdr, double targetTiTv, double pCut, int numKnown, double knownTiTv, int numNovel, double novelTiTv) {
|
public Tranche(double fdr, double targetTiTv, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv) {
|
||||||
this(fdr, targetTiTv, pCut, numKnown, knownTiTv, numNovel, novelTiTv, "anonymous");
|
this(fdr, targetTiTv, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, "anonymous");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Tranche(double fdr, double targetTiTv, double pCut, int numKnown, double knownTiTv, int numNovel, double novelTiTv, String name) {
|
public Tranche(double fdr, double targetTiTv, double minVQSLod, int numKnown, double knownTiTv, int numNovel, double novelTiTv, String name) {
|
||||||
this.fdr = fdr;
|
this.fdr = fdr;
|
||||||
this.targetTiTv = targetTiTv;
|
this.targetTiTv = targetTiTv;
|
||||||
this.pCut = pCut;
|
this.minVQSLod = minVQSLod;
|
||||||
this.novelTiTv = novelTiTv;
|
this.novelTiTv = novelTiTv;
|
||||||
this.numNovel = numNovel;
|
this.numNovel = numNovel;
|
||||||
this.knownTiTv = knownTiTv;
|
this.knownTiTv = knownTiTv;
|
||||||
this.numKnown = numKnown;
|
this.numKnown = numKnown;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
|
||||||
// if ( targetTiTv < 0.5 || targetTiTv > 10 )
|
if ( fdr <= 0.0 )
|
||||||
// throw new UserException("Target Ti/Tv ratio is unreasonable " + targetTiTv);
|
throw new UserException("Target FDR is unreasonable " + fdr);
|
||||||
//
|
|
||||||
// if ( numKnown < 0 || numNovel < 0)
|
if ( targetTiTv < 0.5 || targetTiTv > 10 )
|
||||||
// throw new ReviewedStingException("Invalid tranch - no. variants is < 0 : known " + numKnown + " novel " + numNovel);
|
throw new UserException("Target Ti/Tv ratio is unreasonable " + targetTiTv);
|
||||||
|
|
||||||
|
if ( numKnown < 0 || numNovel < 0)
|
||||||
|
throw new ReviewedStingException("Invalid tranch - no. variants is < 0 : known " + numKnown + " novel " + numNovel);
|
||||||
|
|
||||||
if ( name == null )
|
if ( name == null )
|
||||||
throw new ReviewedStingException("BUG -- name cannot be null");
|
throw new ReviewedStingException("BUG -- name cannot be null");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int compareTo(Tranche other) {
|
public int compareTo(Tranche other) {
|
||||||
|
|
@ -83,8 +85,8 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("Tranche fdr=%.2f minQual=%.2f known=(%d @ %.2f) novel=(%d @ %.2f) name=%s]",
|
return String.format("Tranche fdr=%.2f minVQSLod=%.4f known=(%d @ %.2f) novel=(%d @ %.2f) name=%s]",
|
||||||
fdr, pCut, numKnown, knownTiTv, numNovel, novelTiTv, name);
|
fdr, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -103,12 +105,12 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
|
|
||||||
stream.println("# Variant quality score tranches file");
|
stream.println("# Variant quality score tranches file");
|
||||||
stream.println("# Version number " + CURRENT_VERSION);
|
stream.println("# Version number " + CURRENT_VERSION);
|
||||||
stream.println("FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,pCut,filterName");
|
stream.println("FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName");
|
||||||
|
|
||||||
Tranche prev = null;
|
Tranche prev = null;
|
||||||
for ( Tranche t : tranches ) {
|
for ( Tranche t : tranches ) {
|
||||||
stream.printf("%.2f,%.2f,%d,%d,%.4f,%.4f,%.2f,FDRtranche%.2fto%.2f%n",
|
stream.printf("%.2f,%.2f,%d,%d,%.4f,%.4f,%.4f,FDRtranche%.2fto%.2f%n",
|
||||||
t.fdr,t.targetTiTv,t.numKnown,t.numNovel,t.knownTiTv,t.novelTiTv, t.pCut,
|
t.fdr,t.targetTiTv,t.numKnown,t.numNovel,t.knownTiTv,t.novelTiTv, t.minVQSLod,
|
||||||
(prev == null ? 0.0 : prev.fdr), t.fdr);
|
(prev == null ? 0.0 : prev.fdr), t.fdr);
|
||||||
prev = t;
|
prev = t;
|
||||||
}
|
}
|
||||||
|
|
@ -117,8 +119,10 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static double getDouble(Map<String,String> bindings, String key, boolean required) {
|
private static double getDouble(Map<String,String> bindings, String key, boolean required) {
|
||||||
if ( bindings.containsKey(key) )
|
if ( bindings.containsKey(key) ) {
|
||||||
return Double.valueOf(bindings.get(key));
|
String val = bindings.get(key);
|
||||||
|
return Double.valueOf(val);
|
||||||
|
}
|
||||||
else if ( required ) {
|
else if ( required ) {
|
||||||
throw new UserException.MalformedFile("Malformed tranches file. Missing required key " + key);
|
throw new UserException.MalformedFile("Malformed tranches file. Missing required key " + key);
|
||||||
}
|
}
|
||||||
|
|
@ -154,6 +158,11 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
final String[] vals = line.split(",");
|
final String[] vals = line.split(",");
|
||||||
if( header == null ) {
|
if( header == null ) {
|
||||||
header = vals;
|
header = vals;
|
||||||
|
if ( header.length == 5 )
|
||||||
|
// old style tranches file, throw an error
|
||||||
|
throw new UserException.MalformedFile(f, "Unfortuanately, your tranches file is from a previous version of this tool and cannot be used with the latest code. Please rerun VariantRecalibrator");
|
||||||
|
if ( header.length != 8 )
|
||||||
|
throw new UserException.MalformedFile(f, "Expected 8 elements in header line " + line);
|
||||||
} else {
|
} else {
|
||||||
if ( header.length != vals.length )
|
if ( header.length != vals.length )
|
||||||
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + " line " + line);
|
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + " line " + line);
|
||||||
|
|
@ -162,11 +171,11 @@ public class Tranche implements Comparable<Tranche> {
|
||||||
for ( int i = 0; i < vals.length; i++ ) bindings.put(header[i], vals[i]);
|
for ( int i = 0; i < vals.length; i++ ) bindings.put(header[i], vals[i]);
|
||||||
tranches.add(new Tranche(getDouble(bindings,"FDRtranche", true),
|
tranches.add(new Tranche(getDouble(bindings,"FDRtranche", true),
|
||||||
getDouble(bindings,"targetTiTv", false),
|
getDouble(bindings,"targetTiTv", false),
|
||||||
getDouble(bindings,"pCut", true),
|
getDouble(bindings,"minVQSLod", true),
|
||||||
getInteger(bindings,"numKnown", false),
|
getInteger(bindings,"numKnown", false),
|
||||||
getDouble(bindings,"knownTiTv", false),
|
getDouble(bindings,"knownTiTv", false),
|
||||||
getInteger(bindings,"numNovel", true),
|
getInteger(bindings,"numNovel", true),
|
||||||
Math.max(getDouble(bindings,"novelTiTv", false), getDouble(bindings,"novelTITV", false)),
|
getDouble(bindings,"novelTiTv", true),
|
||||||
bindings.get("filterName")));
|
bindings.get("filterName")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,10 +36,10 @@ public class VariantDatum implements Comparable<VariantDatum> {
|
||||||
public double[] annotations;
|
public double[] annotations;
|
||||||
public boolean isTransition;
|
public boolean isTransition;
|
||||||
public boolean isKnown;
|
public boolean isKnown;
|
||||||
public double qual;
|
public double lod;
|
||||||
public double weight;
|
public double weight;
|
||||||
|
|
||||||
public int compareTo(VariantDatum other) {
|
public int compareTo(VariantDatum other) {
|
||||||
return Double.compare(this.qual, other.qual);
|
return Double.compare(this.lod, other.lod);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -444,7 +444,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
}
|
}
|
||||||
|
|
||||||
public final double evaluateVariant( GenomeLocParser genomeLocParser, final VariantContext vc ) {
|
public final double evaluateVariant( GenomeLocParser genomeLocParser, final VariantContext vc ) {
|
||||||
final double[] pVarInCluster = new double[maxGaussians];
|
final double[] log10pVarInCluster = new double[maxGaussians];
|
||||||
final double[] annotations = new double[dataManager.numAnnotations];
|
final double[] annotations = new double[dataManager.numAnnotations];
|
||||||
|
|
||||||
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
||||||
|
|
@ -452,14 +452,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
||||||
}
|
}
|
||||||
|
|
||||||
evaluateGaussiansForSingleVariant( annotations, pVarInCluster );
|
return evaluateGaussiansForSingleVariant( annotations, log10pVarInCluster );
|
||||||
|
|
||||||
double sum = 0.0;
|
|
||||||
for( int kkk = 0; kkk < maxGaussians; kkk++ ) {
|
|
||||||
sum += pVarInCluster[kkk];
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -474,7 +467,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
public final static List<Tranche> findTranches( final VariantDatum[] data, final double[] FDRtranches, double targetTiTv, File debugFile ) {
|
public final static List<Tranche> findTranches( final VariantDatum[] data, final double[] FDRtranches, double targetTiTv, File debugFile ) {
|
||||||
logger.info(String.format("Finding tranches for %d variants with %d FDRs and a target TiTv of %.2f", data.length, FDRtranches.length, targetTiTv));
|
logger.info(String.format("Finding tranches for %d variants with %d FDRs and a target TiTv of %.2f", data.length, FDRtranches.length, targetTiTv));
|
||||||
|
|
||||||
List<VariantDatum> tranchesData = sortVariantsbyQual(data);
|
List<VariantDatum> tranchesData = sortVariantsbyLod(data);
|
||||||
double[] runningTiTv = calculateRunningTiTv(tranchesData);
|
double[] runningTiTv = calculateRunningTiTv(tranchesData);
|
||||||
|
|
||||||
if ( debugFile != null) { writeTranchesDebuggingInfo(debugFile, tranchesData, runningTiTv); }
|
if ( debugFile != null) { writeTranchesDebuggingInfo(debugFile, tranchesData, runningTiTv); }
|
||||||
|
|
@ -502,14 +495,14 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
out.println("Qual isTransition runningTiTv");
|
out.println("Qual isTransition runningTiTv");
|
||||||
for ( int i = 0; i < runningTiTv.length; i++ ) {
|
for ( int i = 0; i < runningTiTv.length; i++ ) {
|
||||||
VariantDatum d = tranchesData.get(i);
|
VariantDatum d = tranchesData.get(i);
|
||||||
out.printf("%.4f %d %.4f%n", d.qual, d.isTransition ? 1 : 0, runningTiTv[i]);
|
out.printf("%.4f %d %.4f%n", d.lod, d.isTransition ? 1 : 0, runningTiTv[i]);
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new UserException.CouldNotCreateOutputFile(f, e);
|
throw new UserException.CouldNotCreateOutputFile(f, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static List<VariantDatum> sortVariantsbyQual(final VariantDatum[] data) {
|
private final static List<VariantDatum> sortVariantsbyLod(final VariantDatum[] data) {
|
||||||
List<VariantDatum> sorted = new ArrayList<VariantDatum>(Arrays.asList(data));
|
List<VariantDatum> sorted = new ArrayList<VariantDatum>(Arrays.asList(data));
|
||||||
Collections.sort(sorted);
|
Collections.sort(sorted);
|
||||||
return sorted;
|
return sorted;
|
||||||
|
|
@ -555,10 +548,10 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
public final static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double fdr, double targetTiTv ) {
|
public final static Tranche trancheOfVariants( final List<VariantDatum> data, int minI, double fdr, double targetTiTv ) {
|
||||||
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
|
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
|
||||||
|
|
||||||
double qualThreshold = data.get(minI).qual;
|
double minLod = data.get(minI).lod;
|
||||||
VariantDatum last = null;
|
VariantDatum last = null;
|
||||||
for ( VariantDatum datum : data ) {
|
for ( VariantDatum datum : data ) {
|
||||||
if ( datum.qual >= qualThreshold ) {
|
if ( datum.lod >= minLod ) {
|
||||||
//if( ! datum.isKnown ) System.out.println(datum.pos);
|
//if( ! datum.isKnown ) System.out.println(datum.pos);
|
||||||
if ( datum.isKnown ) {
|
if ( datum.isKnown ) {
|
||||||
numKnown++;
|
numKnown++;
|
||||||
|
|
@ -575,7 +568,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
double knownTiTv = knownTi / Math.max(1.0 * knownTv, 1.0);
|
double knownTiTv = knownTi / Math.max(1.0 * knownTv, 1.0);
|
||||||
double novelTiTv = novelTi / Math.max(1.0 * novelTv, 1.0);
|
double novelTiTv = novelTi / Math.max(1.0 * novelTv, 1.0);
|
||||||
|
|
||||||
return new Tranche(fdr, targetTiTv, qualThreshold, numKnown, knownTiTv, numNovel, novelTiTv);
|
return new Tranche(fdr, targetTiTv, minLod, numKnown, knownTiTv, numNovel, novelTiTv);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static double fdrToTiTv(double desiredFDR, double targetTiTv) {
|
public final static double fdrToTiTv(double desiredFDR, double targetTiTv) {
|
||||||
|
|
@ -666,8 +659,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
return likelihood / sumWeight;
|
return likelihood / sumWeight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private double evaluateGaussiansForSingleVariant( final double[] annotations, final double[] log10pVarInCluster ) {
|
||||||
private void evaluateGaussiansForSingleVariant( final double[] annotations, final double[] pVarInCluster ) {
|
|
||||||
|
|
||||||
final int numAnnotations = annotations.length;
|
final int numAnnotations = annotations.length;
|
||||||
|
|
||||||
|
|
@ -689,10 +681,12 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
|
|
||||||
final double denomLog10 = CONSTANT_GAUSSIAN_DENOM_LOG10 + sqrtDeterminantLog10[kkk];
|
final double denomLog10 = CONSTANT_GAUSSIAN_DENOM_LOG10 + sqrtDeterminantLog10[kkk];
|
||||||
final double evalGaussianPDFLog10 = (( -0.5 * sum ) / Math.log(10.0)) - denomLog10;
|
final double evalGaussianPDFLog10 = (( -0.5 * sum ) / Math.log(10.0)) - denomLog10;
|
||||||
final double pVar1 = Math.pow(10.0, pClusterLog10[kkk] + evalGaussianPDFLog10);
|
//final double pVar1 = Math.pow(10.0, pClusterLog10[kkk] + evalGaussianPDFLog10);
|
||||||
pVarInCluster[kkk] = pVar1;
|
final double pVar1 = pClusterLog10[kkk] + evalGaussianPDFLog10;
|
||||||
|
log10pVarInCluster[kkk] = pVar1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return MathUtils.log10sumLog10(log10pVarInCluster);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -57,6 +58,8 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDatum>, ExpandingArrayList<VariantDatum>> {
|
public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDatum>, ExpandingArrayList<VariantDatum>> {
|
||||||
|
public static final String VQS_LOD_KEY = "VQSLOD";
|
||||||
|
public static final double SMALLEST_LOG10_PVAR = -1e6;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Inputs
|
// Inputs
|
||||||
|
|
@ -115,13 +118,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "qual", shortName = "qual", doc = "Don't use sites with original quality scores below the qual threshold. FOR DEBUGGING PURPOSES ONLY.", required=false)
|
@Argument(fullName = "qual", shortName = "qual", doc = "Don't use sites with original quality scores below the qual threshold. FOR DEBUGGING PURPOSES ONLY.", required=false)
|
||||||
private double QUAL_THRESHOLD = 0.0;
|
private double QUAL_THRESHOLD = 0.0;
|
||||||
@Hidden
|
|
||||||
@Argument(fullName="quality_scale_factor", shortName="qScaleFactor", doc="Multiply all final quality scores by this value. FOR DEBUGGING PURPOSES ONLY.", required=false)
|
|
||||||
private double QUALITY_SCALE_FACTOR = 1.0;
|
|
||||||
@Argument(fullName = "debugFile", shortName = "debugFile", doc = "Print debugging information here", required=false)
|
@Argument(fullName = "debugFile", shortName = "debugFile", doc = "Print debugging information here", required=false)
|
||||||
private File DEBUG_FILE = null;
|
private File DEBUG_FILE = null;
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Private Member Variables
|
// Private Member Variables
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
@ -131,13 +130,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
private Set<String> inputNames = new HashSet<String>();
|
private Set<String> inputNames = new HashSet<String>();
|
||||||
private NestedHashMap priorCache = new NestedHashMap();
|
private NestedHashMap priorCache = new NestedHashMap();
|
||||||
private boolean trustACField = false;
|
private boolean trustACField = false;
|
||||||
private double maxQualObserved = 0.0;
|
|
||||||
private PrintStream tranchesStream = null;
|
private PrintStream tranchesStream = null;
|
||||||
|
|
||||||
private static double round2(double num) {
|
private static double round4(double num) {
|
||||||
double result = num * 100.0;
|
double result = num * 10000.0;
|
||||||
result = Math.round(result);
|
result = Math.round(result);
|
||||||
result = result / 100.0;
|
result = result / 10000.0;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -196,8 +194,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
final TreeSet<String> samples = new TreeSet<String>();
|
final TreeSet<String> samples = new TreeSet<String>();
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||||
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score"));
|
hInfo.add(new VCFInfoHeaderLine(VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "log10-scaled probability of variant being true under the trained gaussian mixture model"));
|
||||||
hInfo.add(new VCFInfoHeaderLine("LOD", 1, VCFHeaderLineType.Float, "The log odds ratio calculated by the VR algorithm which was turned into the phred scaled recalibrated quality score"));
|
|
||||||
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
|
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
|
||||||
|
|
||||||
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
|
|
@ -288,19 +285,20 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
priorCache.put( priorLodFactor, false, priorKey );
|
priorCache.put( priorLodFactor, false, priorKey );
|
||||||
}
|
}
|
||||||
|
|
||||||
final double pVar = theModel.evaluateVariant( ref.getGenomeLocParser(),vc );
|
final double log10pVar = theModel.evaluateVariant( ref.getGenomeLocParser(),vc );
|
||||||
final double lod = priorLodFactor + Math.log10(pVar);
|
final double lod = priorLodFactor + log10pVar;
|
||||||
variantDatum.pos = vc.getStart();
|
|
||||||
variantDatum.qual = round2(Math.abs( QUALITY_SCALE_FACTOR * QualityUtils.lodToPhredScaleErrorRate(lod)));
|
if ( lod == Double.NEGATIVE_INFINITY ) {
|
||||||
if( variantDatum.qual > maxQualObserved ) {
|
throw new ReviewedStingException("Negative infinity detected during summation at " + vc);
|
||||||
maxQualObserved = variantDatum.qual;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variantDatum.pos = vc.getStart();
|
||||||
|
variantDatum.lod = round4(lod);
|
||||||
|
|
||||||
mapList.add( variantDatum );
|
mapList.add( variantDatum );
|
||||||
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||||
attrs.put("OQ", String.format("%.2f", vc.getPhredScaledQual()));
|
attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
|
||||||
attrs.put("LOD", String.format("%.4f", lod));
|
VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), new HashSet<String>(), attrs);
|
||||||
VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, variantDatum.qual / 10.0, new HashSet<String>(), attrs);
|
|
||||||
|
|
||||||
vcfWriter.add( newVC, ref.getBase() );
|
vcfWriter.add( newVC, ref.getBase() );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,7 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// the overall lod
|
// the overall lod
|
||||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double overallLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||||
|
|
||||||
// the forward lod
|
// the forward lod
|
||||||
|
|
@ -243,7 +243,7 @@ public class UnifiedGenotyperEngine {
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
||||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double forwardLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||||
|
|
||||||
// the reverse lod
|
// the reverse lod
|
||||||
|
|
@ -253,7 +253,7 @@ public class UnifiedGenotyperEngine {
|
||||||
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
|
||||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||||
double reverseLog10PofF = MathUtils.log10sum(log10AlleleFrequencyPosteriors.get(), 1);
|
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
|
||||||
if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||||
|
|
||||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ public class MathUtils {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double log10sum(double[] log10p, int start) {
|
public static double log10sumLog10(double[] log10p, int start) {
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
|
|
||||||
double maxValue = Utils.findMaxEntry(log10p);
|
double maxValue = Utils.findMaxEntry(log10p);
|
||||||
|
|
@ -76,9 +76,14 @@ public class MathUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double sumLog10(double[] log10values) {
|
public static double sumLog10(double[] log10values) {
|
||||||
double s = 0.0;
|
return Math.pow(10.0, log10sumLog10(log10values));
|
||||||
for ( double v : log10values) s += Math.pow(10.0, v);
|
// double s = 0.0;
|
||||||
return s;
|
// for ( double v : log10values) s += Math.pow(10.0, v);
|
||||||
|
// return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double log10sumLog10(double[] log10values) {
|
||||||
|
return log10sumLog10(log10values, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean wellFormedDouble(double val) {
|
public static boolean wellFormedDouble(double val) {
|
||||||
|
|
|
||||||
|
|
@ -652,19 +652,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// todo -- reimplement efficiently
|
|
||||||
// todo -- why is this public?
|
|
||||||
@Override
|
|
||||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
|
||||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
|
||||||
int i = 0;
|
|
||||||
for ( PileupElement pile : this ) {
|
|
||||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
|
||||||
}
|
|
||||||
|
|
||||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple useful routine to count the number of deletion bases in this pileup
|
* Simple useful routine to count the number of deletion bases in this pileup
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.pileup;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: depristo
|
|
||||||
* Date: Apr 14, 2009
|
|
||||||
* Time: 8:54:05 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class ExtendedPileupElement extends PileupElement {
|
|
||||||
private int pileupOffset = 0;
|
|
||||||
private ReadBackedPileup pileup = null;
|
|
||||||
|
|
||||||
public ExtendedPileupElement( SAMRecord read, int readOffset, int pileupOffset, ReadBackedPileup pileup ) {
|
|
||||||
super(read, readOffset);
|
|
||||||
this.pileupOffset = pileupOffset;
|
|
||||||
this.pileup = pileup;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getPileupOffset() {
|
|
||||||
return pileupOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReadBackedPileup getPileup() {
|
|
||||||
return pileup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -141,9 +141,6 @@ public interface ReadBackedPileup extends Iterable<PileupElement> {
|
||||||
*/
|
*/
|
||||||
public ReadBackedPileup getPileupForSample(String sampleName);
|
public ReadBackedPileup getPileupForSample(String sampleName);
|
||||||
|
|
||||||
// todo -- delete or make private
|
|
||||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple useful routine to count the number of deletion bases in this pileup
|
* Simple useful routine to count the number of deletion bases in this pileup
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -143,8 +143,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTranches() {
|
public void testTranches() {
|
||||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -E TiTvVariantEvaluator -L chr1 -noStandard -reportType CSV -tf " + testDir + "tranches.4.txt";
|
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -E TiTvVariantEvaluator -L chr1 -noStandard -reportType CSV -tf " + testDir + "tranches.6.txt";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("85b6621c64cc8f9a3b68cea644edf216"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("1002c7a71e6c14f87398d924ac1d92af"));
|
||||||
executeTestParallel("testTranches",spec);
|
executeTestParallel("testTranches",spec);
|
||||||
//executeTest("testACDiscordanceAtAC1EvalAC2Comp",spec);
|
//executeTest("testACDiscordanceAtAC1EvalAC2Comp",spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
||||||
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
|
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
|
||||||
|
|
||||||
private final File QUAL_DATA = new File(testDir + "tranches.raw.dat");
|
private final File QUAL_DATA = new File(testDir + "tranches.raw.dat");
|
||||||
private final double[] FDRS = new double[]{0.1, 1, 10, 100};
|
private final double[] FDRS = new double[]{0.1, 1, 2, 10};
|
||||||
private final double TARGET_TITV = 2.8;
|
private final double TARGET_TITV = 2.8;
|
||||||
private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt");
|
private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt");
|
||||||
private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt");
|
private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt");
|
||||||
|
|
@ -68,12 +68,13 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
||||||
List<VariantDatum> vd = new ArrayList<VariantDatum>();
|
List<VariantDatum> vd = new ArrayList<VariantDatum>();
|
||||||
try {
|
try {
|
||||||
for ( String line : new XReadLines(QUAL_DATA, true) ) {
|
for ( String line : new XReadLines(QUAL_DATA, true) ) {
|
||||||
String[] parts = line.split(" ");
|
String[] parts = line.split("\t");
|
||||||
|
// QUAL,TRANSITION,ID,LOD,FILTER
|
||||||
if ( ! parts[0].equals("QUAL") ) {
|
if ( ! parts[0].equals("QUAL") ) {
|
||||||
VariantDatum datum = new VariantDatum();
|
VariantDatum datum = new VariantDatum();
|
||||||
datum.qual = Double.valueOf(parts[0]);
|
datum.lod = Double.valueOf(parts[3]);
|
||||||
datum.isTransition = parts[1].equals("1");
|
datum.isTransition = parts[1].equals("1");
|
||||||
datum.isKnown = parts[2].equals("1");
|
datum.isKnown = ! parts[2].equals(".");
|
||||||
vd.add(datum);
|
vd.add(datum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -94,7 +95,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
||||||
read(EXPECTED_TRANCHES_NEW);
|
read(EXPECTED_TRANCHES_NEW);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(expectedExceptions = {UserException.MalformedFile.class})
|
||||||
public final void readOldFormat() {
|
public final void readOldFormat() {
|
||||||
read(EXPECTED_TRANCHES_OLD);
|
read(EXPECTED_TRANCHES_OLD);
|
||||||
}
|
}
|
||||||
|
|
@ -103,13 +104,6 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
||||||
return Tranche.readTraches(f);
|
return Tranche.readTraches(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public final void testNewAndOldAretheSame() {
|
|
||||||
List<Tranche> newFormat = read(EXPECTED_TRANCHES_NEW);
|
|
||||||
List<Tranche> oldFormat = read(EXPECTED_TRANCHES_OLD);
|
|
||||||
assertTranchesAreTheSame(newFormat, oldFormat, false, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void assertTranchesAreTheSame(List<Tranche> newFormat, List<Tranche> oldFormat, boolean completeP, boolean includeName) {
|
private static void assertTranchesAreTheSame(List<Tranche> newFormat, List<Tranche> oldFormat, boolean completeP, boolean includeName) {
|
||||||
Assert.assertEquals(oldFormat.size(), newFormat.size());
|
Assert.assertEquals(oldFormat.size(), newFormat.size());
|
||||||
for ( int i = 0; i < newFormat.size(); i++ ) {
|
for ( int i = 0; i < newFormat.size(); i++ ) {
|
||||||
|
|
@ -136,15 +130,15 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
||||||
assertTranchesAreTheSame(read(EXPECTED_TRANCHES_NEW), tranches, true, false);
|
assertTranchesAreTheSame(read(EXPECTED_TRANCHES_NEW), tranches, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Test(expectedExceptions = {UserException.class})
|
@Test(expectedExceptions = {UserException.class})
|
||||||
// public final void testBadFDR() {
|
public final void testBadFDR() {
|
||||||
// List<VariantDatum> vd = readData();
|
List<VariantDatum> vd = readData();
|
||||||
// VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), new double[]{-1}, TARGET_TITV);
|
VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), new double[]{-1}, TARGET_TITV);
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// @Test(expectedExceptions = {UserException.class})
|
@Test(expectedExceptions = {UserException.class})
|
||||||
// public final void testBadTargetTiTv() {
|
public final void testBadTargetTiTv() {
|
||||||
// List<VariantDatum> vd = readData();
|
List<VariantDatum> vd = readData();
|
||||||
// VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), FDRS, 0.1);
|
VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), FDRS, 0.1);
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,15 +30,15 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
|
VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
|
||||||
"ab2629d67e378fd3aceb8318f0fbfe04", // in vcf
|
"ab2629d67e378fd3aceb8318f0fbfe04", // in vcf
|
||||||
"4dd95e9d8e5d21a6ab73de67d9663492", // tranches
|
"d8f0a92aa42989d332d3c65f61352978", // tranches
|
||||||
"4e893672230fca625f70b0491f3b36cb", // recalVCF
|
"3370e51f6e5035f2472c66762d13c821", // recalVCF
|
||||||
"371b0e2796982485ae050e46892f6660"); // cut VCF
|
"4801e6cfaeadc81c5c1f9bf5948e2a1c"); // cut VCF
|
||||||
|
|
||||||
VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf",
|
VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf",
|
||||||
"725489156426e4ddd8d623ab3d4b1023", // in vcf
|
"725489156426e4ddd8d623ab3d4b1023", // in vcf
|
||||||
"dfc07132f592a811d0d6c25a4cb67a09", // tranches
|
"3a7067247146f4a77bb4fd7bc36f94c4", // tranches
|
||||||
"d52e4f511c9c00f8c21dffea81c47103", // recalVCF
|
"9a525c6838bff695321fc7ac0e458f9c", // recalVCF
|
||||||
"39716b2f03b50e88855d7975dd1b9b3e"); // cut VCF
|
"41e0a16af150244454ee68948ced00fb"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRTest")
|
@DataProvider(name = "VRTest")
|
||||||
public Object[][] createData1() {
|
public Object[][] createData1() {
|
||||||
|
|
@ -64,7 +64,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
executeTest("testGenerateVariantClusters-"+params.inVCF, spec).getFirst();
|
executeTest("testGenerateVariantClusters-"+params.inVCF, spec).getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "VRTest", dependsOnMethods = {"testGenerateVariantClusters"}, enabled = true)
|
@Test(dataProvider = "VRTest", enabled = true)
|
||||||
public void testVariantRecalibrator(VRTest params) {
|
public void testVariantRecalibrator(VRTest params) {
|
||||||
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -85,7 +85,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
|
executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "VRTest", dependsOnMethods = {"testVariantRecalibrator"}, enabled = true)
|
@Test(dataProvider = "VRTest", enabled = true)
|
||||||
public void testApplyVariantCuts(VRTest params) {
|
public void testApplyVariantCuts(VRTest params) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b36KGReference +
|
"-R " + b36KGReference +
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
# comment 1
|
# Variant quality score tranches file
|
||||||
# comment 2
|
# Version number 2
|
||||||
FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,pCut,filterName
|
FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName
|
||||||
0.10,2.8000,15791,893,3.3086,2.8000,1.1800,FDRtranche0.00to0.10
|
0.10,2.80,15839,897,3.3052,2.8008,-3.8485,FDRtranche0.00to0.10
|
||||||
1.00,2.8000,15824,903,3.3047,2.7782,1.0600,FDRtranche0.10to1.00
|
1.00,2.80,15844,900,3.3043,2.7815,-3.8652,FDRtranche0.10to1.00
|
||||||
10.00,2.8000,16037,984,3.2937,2.5269,0.3800,FDRtranche1.00to10.00
|
2.00,2.80,15847,902,3.3051,2.7583,-3.8826,FDRtranche1.00to2.00
|
||||||
100.00,2.8000,16578,1602,3.2291,1.6091,0.0000,FDRtranche10.00to100.00
|
10.00,2.80,16153,1039,3.2744,2.5704,-5.1058,FDRtranche2.00to10.00
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue