Moving G's MyHapScore to replace the old HapScore
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3943 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7858ffec32
commit
8d8acc9fae
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010 The Broad Institute
|
* Copyright (c) 2010, The Broad Institute
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
* obtaining a copy of this software and associated documentation
|
||||||
|
|
@ -12,15 +12,14 @@
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be
|
* The above copyright notice and this permission notice shall be
|
||||||
* included in all copies or substantial portions of the Software.
|
* included in all copies or substantial portions of the Software.
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
@ -34,20 +33,16 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
// todo -- rename to haplotype penalty
|
|
||||||
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||||
|
private final static String REGEXP_WILDCARD = ".";
|
||||||
// if true, we compute a second haplotype from the reads, instead of constraining ourselves to the reference
|
|
||||||
// as a haplotype itself
|
|
||||||
private final static boolean USE_NON_REFERENCE_SECOND_HAPLOTYPE = true;
|
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||||
|
|
@ -58,13 +53,14 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE);
|
int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE);
|
||||||
int contextSize = contextWingSize * 2 + 1;
|
int contextSize = contextWingSize * 2 + 1;
|
||||||
|
|
||||||
// calculate
|
// Compute all haplotypes consistent with the current read pileup
|
||||||
Haplotype refHaplotype = calcRefHaplotype(vc, ref, context, contextSize);
|
List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
|
||||||
Haplotype altHaplotype = new Haplotype(getPileupOfAllele(vc.getAlternateAllele(0), context.getBasePileup()), contextSize);
|
|
||||||
|
|
||||||
//System.exit(1);
|
|
||||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
||||||
double score = scoreReadsAgainstHaplotypes(Arrays.asList(refHaplotype, altHaplotype), context.getBasePileup(), contextSize);
|
double score = 0.0;
|
||||||
|
|
||||||
|
if (haplotypes != null)
|
||||||
|
score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
|
||||||
|
|
||||||
// return the score
|
// return the score
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
|
@ -72,37 +68,184 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo -- note that the refPileup.size() won't deal correctly with the situation where the current site is hom-var
|
private class HaplotypeComparator implements Comparator<Haplotype>{
|
||||||
// todo -- but there's nearby het size. In order to really handle this we need to group reads into two clusters,
|
|
||||||
// todo -- but if we are going to do this we might as well just assemble the whole region
|
public int compare(Haplotype a, Haplotype b) {
|
||||||
public Haplotype calcRefHaplotype(VariantContext vc, ReferenceContext ref, AlignmentContext context, int contextSize) {
|
if (a.getQualitySum() < b.getQualitySum())
|
||||||
ReadBackedPileup refPileup = getPileupOfAllele(vc.getReference(), context.getBasePileup());
|
return 1;
|
||||||
if ( USE_NON_REFERENCE_SECOND_HAPLOTYPE && refPileup.size() > 0 ) {
|
if (a.getQualitySum() > b.getQualitySum()){
|
||||||
// we are calculating the reference haplotype from the reads itself -- effectively allows us to
|
return -1;
|
||||||
// have het haplotypes that are hom-var in the surrounding context, indicating that the individual
|
}
|
||||||
// as two alt haplotypes
|
return 0;
|
||||||
return new Haplotype(refPileup, contextSize);
|
|
||||||
} else {
|
|
||||||
// we are constraining the reference haplotype to really be the reference itself
|
|
||||||
int contextWingSize = (contextSize - 1) / 2;
|
|
||||||
int refMiddle = (int)(ref.getWindow().size() - 1) / 2;
|
|
||||||
int refStart = refMiddle - contextWingSize;
|
|
||||||
int refStop = refMiddle + contextWingSize + 1;
|
|
||||||
String refString = new String(ref.getBases()).substring(refStart, refStop);
|
|
||||||
return new Haplotype(refString.getBytes(), 60);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private List<Haplotype> computeHaplotypes(ReadBackedPileup pileup, int contextSize) {
|
||||||
|
// Compute all possible haplotypes consistent with current pileup
|
||||||
|
ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
||||||
|
PriorityQueue<Haplotype> haplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
|
||||||
|
|
||||||
|
|
||||||
|
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
||||||
|
if (ReadUtils.is454Read(p.getRead()))
|
||||||
|
continue;
|
||||||
|
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
||||||
|
|
||||||
|
|
||||||
|
haplotypeQueue.add(haplotypeFromRead);
|
||||||
|
//haplotypeList.add(haplotypeFromRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
||||||
|
Haplotype elem;
|
||||||
|
while ((elem = haplotypeQueue.poll()) != null) {
|
||||||
|
//System.out.print("element: "+elem.toString());
|
||||||
|
//System.out.format(" SumQual = %f\n", elem.getQualitySum());
|
||||||
|
boolean foundHaplotypeMatch = false;
|
||||||
|
//Haplotype[] remainingHaplotypes = haplotypeQueue.toArray(new Haplotype[haplotypeQueue.size()]);
|
||||||
|
for ( Haplotype haplotypeFromList : haplotypeList ) {
|
||||||
|
|
||||||
|
Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList);
|
||||||
|
//System.out.format("-Checking consensus for %s:", haplotypeFromList.toString());
|
||||||
|
if (consensusHaplotype != null) {
|
||||||
|
//System.out.format("--Consensus haplotype = %s, qual = %f\n", consensusHaplotype.toString(), consensusHaplotype.getQualitySum());
|
||||||
|
foundHaplotypeMatch = true;
|
||||||
|
if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) {
|
||||||
|
haplotypeList.remove(haplotypeFromList);
|
||||||
|
haplotypeList.add(consensusHaplotype);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* else {
|
||||||
|
System.out.println("no consensus found");
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!foundHaplotypeMatch) {
|
||||||
|
haplotypeList.add(elem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now retrieve two most popular haplotypes
|
||||||
|
// TODO - quick and dirty solution, could use better data structures to do this automatically
|
||||||
|
int bestIdx=0, secondBestIdx=0;
|
||||||
|
double bestIdxVal=-1.0, secondBestIdxVal = -1.0;
|
||||||
|
|
||||||
|
for (int k=0; k < haplotypeList.size(); k++) {
|
||||||
|
|
||||||
|
double qualSum = haplotypeList.get(k).getQualitySum();
|
||||||
|
if (qualSum >= bestIdxVal) {
|
||||||
|
secondBestIdx = bestIdx;
|
||||||
|
secondBestIdxVal = bestIdxVal;
|
||||||
|
bestIdx = k;
|
||||||
|
bestIdxVal = qualSum;
|
||||||
|
}
|
||||||
|
else if (qualSum >= secondBestIdxVal) {
|
||||||
|
// check if current is second best
|
||||||
|
secondBestIdx = k;
|
||||||
|
secondBestIdxVal = qualSum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (haplotypeList.size() > 0) {
|
||||||
|
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
||||||
|
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
||||||
|
|
||||||
|
// Temp hack to match old implementation's scaling, TBD better behavior
|
||||||
|
|
||||||
|
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
|
||||||
|
SAMRecord read = p.getRead();
|
||||||
|
int readOffsetFromPileup = p.getOffset();
|
||||||
|
int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
|
||||||
|
byte[] haplotypeBases = new byte[contextSize];
|
||||||
|
|
||||||
|
for(int i=0; i < contextSize; i++) {
|
||||||
|
haplotypeBases[i] = REGEXP_WILDCARD.getBytes()[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
double[] baseQualities = new double[contextSize];
|
||||||
|
Arrays.fill(baseQualities,0.0);
|
||||||
|
|
||||||
|
for (int i = 0; i < contextSize; i++ ) {
|
||||||
|
int baseOffset = i + baseOffsetStart;
|
||||||
|
if ( baseOffset < 0 )
|
||||||
|
continue;
|
||||||
|
if ( baseOffset >= read.getReadLength() )
|
||||||
|
break;
|
||||||
|
|
||||||
|
haplotypeBases[i] = read.getReadBases()[baseOffset];
|
||||||
|
baseQualities[i] = (double)read.getBaseQualities()[baseOffset];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return new Haplotype(haplotypeBases, baseQualities);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private Haplotype getConsensusHaplotype(Haplotype haplotypeA, Haplotype haplotypeB) {
|
||||||
|
String a = haplotypeA.toString();
|
||||||
|
String b = haplotypeB.toString();
|
||||||
|
|
||||||
|
if (a.length() != b.length())
|
||||||
|
throw new StingException("Haplotypes a and b must be of same length");
|
||||||
|
|
||||||
|
char chA, chB;
|
||||||
|
char wc = REGEXP_WILDCARD.charAt(0);
|
||||||
|
|
||||||
|
|
||||||
|
char[] consensusChars = new char[a.length()];
|
||||||
|
double[] consensusQuals = new double[a.length()];
|
||||||
|
|
||||||
|
for (int i=0; i < a.length(); i++) {
|
||||||
|
chA = a.charAt(i);
|
||||||
|
chB = b.charAt(i);
|
||||||
|
|
||||||
|
if ((chA != chB) && (chA != wc) && (chB != wc))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if ((chA == wc) && (chB == wc)) {
|
||||||
|
consensusChars[i] = wc;
|
||||||
|
consensusQuals[i] = 0.0;
|
||||||
|
}
|
||||||
|
else if ((chA == wc)) {
|
||||||
|
consensusChars[i] = chB;
|
||||||
|
consensusQuals[i] = haplotypeB.quals[i];
|
||||||
|
}
|
||||||
|
else if ((chB == wc)){
|
||||||
|
consensusChars[i] = chA;
|
||||||
|
consensusQuals[i] = haplotypeA.quals[i];
|
||||||
|
} else {
|
||||||
|
consensusChars[i] = chA;
|
||||||
|
consensusQuals[i] = haplotypeA.quals[i]+haplotypeB.quals[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return new Haplotype(new String(consensusChars), consensusQuals);
|
||||||
|
}
|
||||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
||||||
private double scoreReadsAgainstHaplotypes(List<Haplotype> haplotypes, ReadBackedPileup pileup, int contextSize) {
|
private double scoreReadsAgainstHaplotypes(List<Haplotype> haplotypes, ReadBackedPileup pileup, int contextSize) {
|
||||||
if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
||||||
if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
|
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
|
||||||
|
|
||||||
double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
|
double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
|
||||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
||||||
SAMRecord read = p.getRead();
|
SAMRecord read = p.getRead();
|
||||||
int readOffsetFromPileup = p.getOffset();
|
int readOffsetFromPileup = p.getOffset();
|
||||||
|
|
||||||
|
if (ReadUtils.is454Read(read))
|
||||||
|
continue;
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
||||||
double m = 10000000;
|
double m = 10000000;
|
||||||
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
||||||
|
|
@ -172,112 +315,38 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
|
|
||||||
private class Haplotype {
|
private class Haplotype {
|
||||||
byte[] bases = null;
|
byte[] bases = null;
|
||||||
byte[] quals = null;
|
double[] quals = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
||||||
*
|
*
|
||||||
* @param bases
|
* @param bases bases
|
||||||
* @param qual
|
* @param qual qual
|
||||||
*/
|
*/
|
||||||
Haplotype(byte[] bases, int qual) {
|
Haplotype(byte[] bases, int qual) {
|
||||||
this.bases = bases;
|
this.bases = bases;
|
||||||
quals = new byte[bases.length];
|
quals = new double[bases.length];
|
||||||
Arrays.fill(quals, (byte)qual);
|
Arrays.fill(quals, (double)qual);
|
||||||
}
|
}
|
||||||
|
|
||||||
Haplotype(ReadBackedPileup pileup, int contextSize ) {
|
Haplotype(byte[] bases, double[] quals) {
|
||||||
this.bases = new byte[contextSize];
|
this.bases = bases;
|
||||||
this.quals = new byte[contextSize];
|
this.quals = quals;
|
||||||
calculateConsensusOverWindow(pileup, contextSize, (contextSize - 1) / 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void calculateConsensusOverWindow(ReadBackedPileup pileup, int contextSize, int pileupOffset) {
|
Haplotype(String bases, double[] quals) {
|
||||||
// for each context position
|
this.bases = bases.getBytes();
|
||||||
for ( int i = 0; i < contextSize; i++ ) {
|
this.quals = quals;
|
||||||
int offsetFromPileup = i - pileupOffset;
|
|
||||||
ReadBackedPileup offsetPileup = pileupAtOffset(pileup, offsetFromPileup);
|
|
||||||
if ( DEBUG ) System.out.printf("pileup is %s%n", offsetPileup);
|
|
||||||
BaseQual bq = calcConsensusAtLocus(offsetPileup, FLAT_BASE_PRIORS);
|
|
||||||
this.bases[i] = bq.getBase();
|
|
||||||
this.quals[i] = bq.getQual();
|
|
||||||
if ( DEBUG ) System.out.printf(" At %d: offset %d bq = %c / %d%n", i, offsetFromPileup, (char)bq.getBase(), bq.getQual());
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private BaseQual calcConsensusAtLocus( ReadBackedPileup pileup, double[] log10priors ) {
|
|
||||||
double[] log10BaseLikelihoods = new double[BaseUtils.Base.values().length];
|
|
||||||
|
|
||||||
// loop over a, c, g, t and determine the most likely hypothesis
|
|
||||||
for ( BaseUtils.Base base : BaseUtils.Base.values() ) {
|
|
||||||
double log10L = log10priors[base.getIndex()];
|
|
||||||
|
|
||||||
for ( PileupElement p : pileup ) {
|
|
||||||
byte qual = p.getQual();
|
|
||||||
if ( qual > 5 ) {
|
|
||||||
double baseP = QualityUtils.qualToProb(qual);
|
|
||||||
double L = base.sameBase(p.getBase()) ? baseP : 1 - baseP;
|
|
||||||
if ( Double.isInfinite(Math.log10(L)) )
|
|
||||||
throw new StingException("BUG -- base likelihood is infinity!");
|
|
||||||
log10L += Math.log10(L);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log10BaseLikelihoods[base.getIndex()] = log10L;
|
|
||||||
}
|
|
||||||
|
|
||||||
double[] posteriors = MathUtils.normalizeFromLog10(log10BaseLikelihoods, false);
|
|
||||||
int mostLikelyIndex = MathUtils.maxElementIndex(posteriors);
|
|
||||||
byte mostLikelyBase = BaseUtils.Base.values()[mostLikelyIndex].getBase(); // get the most likely option
|
|
||||||
double MAX_CONSENSUS_QUALITY = 0.000001;
|
|
||||||
byte qual = QualityUtils.probToQual(posteriors[mostLikelyIndex],MAX_CONSENSUS_QUALITY); // call posterior calculator here over L over bases
|
|
||||||
return new BaseQual(mostLikelyBase, qual);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() { return new String(this.bases); }
|
public String toString() { return new String(this.bases); }
|
||||||
|
|
||||||
|
double getQualitySum() {
|
||||||
|
double s = 0;
|
||||||
|
for (int k=0; k < bases.length; k++) {
|
||||||
|
s += quals[k];
|
||||||
}
|
}
|
||||||
|
return s;
|
||||||
private static class BaseQual extends Pair<Byte, Byte> {
|
|
||||||
public BaseQual(byte base, byte qual) {
|
|
||||||
super(base, qual);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte getBase() { return getFirst(); }
|
|
||||||
public byte getQual() { return getSecond(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
private static ReadBackedPileup pileupAtOffset(ReadBackedPileup pileup, int offsetFromPileup) {
|
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
ArrayList<Integer> offsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
// go through the pileup, read by read, collecting up the context at offset
|
|
||||||
for ( PileupElement p : pileup ) {
|
|
||||||
// not safe -- doesn't work for indel-containing reads!
|
|
||||||
|
|
||||||
// whole algorithm should be restructured to handle other reads in the window or use LocusIteratorByState
|
|
||||||
SAMRecord read = p.getRead();
|
|
||||||
int readOffsetInPileup = p.getOffset();
|
|
||||||
int neededReadOffset = readOffsetInPileup + offsetFromPileup;
|
|
||||||
if ( neededReadOffset >= 0 && neededReadOffset < read.getReadLength() ) {
|
|
||||||
reads.add(p.getRead());
|
|
||||||
offsets.add(neededReadOffset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ReadBackedPileupImpl(pileup.getLocation(), reads, offsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) {
|
|
||||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
|
||||||
byte alleleBase = allele.getBases()[0]; // assumes SNP
|
|
||||||
|
|
||||||
for ( PileupElement p : pileup ) {
|
|
||||||
if ( p.getBase() == alleleBase ) {
|
|
||||||
filteredPileup.add(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }
|
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }
|
||||||
|
|
|
||||||
|
|
@ -1,357 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
|
||||||
|
|
||||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
|
||||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
|
||||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|
||||||
|
|
||||||
public class MyHaplotypeScore implements InfoFieldAnnotation {
|
|
||||||
private final static boolean DEBUG = false;
|
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
|
||||||
private final static String REGEXP_WILDCARD = ".";
|
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
|
||||||
if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
|
||||||
return null;
|
|
||||||
|
|
||||||
AlignmentContext context = StratifiedAlignmentContext.joinContexts(stratifiedContexts.values());
|
|
||||||
|
|
||||||
int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE);
|
|
||||||
int contextSize = contextWingSize * 2 + 1;
|
|
||||||
|
|
||||||
// Compute all haplotypes consistent with the current read pileup
|
|
||||||
List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
|
|
||||||
|
|
||||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
|
||||||
double score = 0.0;
|
|
||||||
|
|
||||||
if (haplotypes != null)
|
|
||||||
score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
|
|
||||||
|
|
||||||
// return the score
|
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
|
||||||
map.put(getKeyNames().get(0), String.format("%.2f", score));
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
private class HaplotypeComparator implements Comparator<Haplotype>{
|
|
||||||
|
|
||||||
public int compare(Haplotype a, Haplotype b) {
|
|
||||||
if (a.getQualitySum() < b.getQualitySum())
|
|
||||||
return 1;
|
|
||||||
if (a.getQualitySum() > b.getQualitySum()){
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<Haplotype> computeHaplotypes(ReadBackedPileup pileup, int contextSize) {
|
|
||||||
// Compute all possible haplotypes consistent with current pileup
|
|
||||||
ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
|
||||||
ArrayList<Integer> readsPerHaplotype = new ArrayList<Integer>();
|
|
||||||
PriorityQueue<Haplotype> haplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
|
|
||||||
|
|
||||||
|
|
||||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
|
||||||
if (ReadUtils.is454Read(p.getRead()))
|
|
||||||
continue;
|
|
||||||
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
|
||||||
|
|
||||||
|
|
||||||
haplotypeQueue.add(haplotypeFromRead);
|
|
||||||
//haplotypeList.add(haplotypeFromRead);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes
|
|
||||||
Haplotype elem;
|
|
||||||
while ((elem = haplotypeQueue.poll()) != null) {
|
|
||||||
//System.out.print("element: "+elem.toString());
|
|
||||||
//System.out.format(" SumQual = %f\n", elem.getQualitySum());
|
|
||||||
boolean foundHaplotypeMatch = false;
|
|
||||||
//Haplotype[] remainingHaplotypes = haplotypeQueue.toArray(new Haplotype[haplotypeQueue.size()]);
|
|
||||||
for ( Haplotype haplotypeFromList : haplotypeList ) {
|
|
||||||
|
|
||||||
Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList);
|
|
||||||
//System.out.format("-Checking consensus for %s:", haplotypeFromList.toString());
|
|
||||||
if (consensusHaplotype != null) {
|
|
||||||
//System.out.format("--Consensus haplotype = %s, qual = %f\n", consensusHaplotype.toString(), consensusHaplotype.getQualitySum());
|
|
||||||
foundHaplotypeMatch = true;
|
|
||||||
if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) {
|
|
||||||
haplotypeList.remove(haplotypeFromList);
|
|
||||||
haplotypeList.add(consensusHaplotype);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* else {
|
|
||||||
System.out.println("no consensus found");
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!foundHaplotypeMatch) {
|
|
||||||
haplotypeList.add(elem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Now retrieve two most popular haplotypes
|
|
||||||
// TODO - quick and dirty solution, could use better data structures to do this automatically
|
|
||||||
int bestIdx=0, secondBestIdx=0;
|
|
||||||
double bestIdxVal=-1.0, secondBestIdxVal = -1.0;
|
|
||||||
|
|
||||||
for (int k=0; k < haplotypeList.size(); k++) {
|
|
||||||
|
|
||||||
double qualSum = haplotypeList.get(k).getQualitySum();
|
|
||||||
if (qualSum >= bestIdxVal) {
|
|
||||||
secondBestIdx = bestIdx;
|
|
||||||
secondBestIdxVal = bestIdxVal;
|
|
||||||
bestIdx = k;
|
|
||||||
bestIdxVal = qualSum;
|
|
||||||
}
|
|
||||||
else if (qualSum >= secondBestIdxVal) {
|
|
||||||
// check if current is second best
|
|
||||||
secondBestIdx = k;
|
|
||||||
secondBestIdxVal = qualSum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (haplotypeList.size() > 0) {
|
|
||||||
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
|
||||||
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
|
||||||
|
|
||||||
// Temp hack to match old implementation's scaling, TBD better behavior
|
|
||||||
|
|
||||||
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
|
|
||||||
SAMRecord read = p.getRead();
|
|
||||||
int readOffsetFromPileup = p.getOffset();
|
|
||||||
int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2;
|
|
||||||
byte[] haplotypeBases = new byte[contextSize];
|
|
||||||
|
|
||||||
for(int i=0; i < contextSize; i++) {
|
|
||||||
haplotypeBases[i] = REGEXP_WILDCARD.getBytes()[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
double[] baseQualities = new double[contextSize];
|
|
||||||
Arrays.fill(baseQualities,0.0);
|
|
||||||
|
|
||||||
for (int i = 0; i < contextSize; i++ ) {
|
|
||||||
int baseOffset = i + baseOffsetStart;
|
|
||||||
if ( baseOffset < 0 )
|
|
||||||
continue;
|
|
||||||
if ( baseOffset >= read.getReadLength() )
|
|
||||||
break;
|
|
||||||
|
|
||||||
haplotypeBases[i] = read.getReadBases()[baseOffset];
|
|
||||||
baseQualities[i] = (double)read.getBaseQualities()[baseOffset];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return new Haplotype(haplotypeBases, baseQualities);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private Haplotype getConsensusHaplotype(Haplotype haplotypeA, Haplotype haplotypeB) {
|
|
||||||
String a = haplotypeA.toString();
|
|
||||||
String b = haplotypeB.toString();
|
|
||||||
|
|
||||||
if (a.length() != b.length())
|
|
||||||
throw new StingException("Haplotypes a and b must be of same length");
|
|
||||||
|
|
||||||
char chA, chB;
|
|
||||||
char wc = REGEXP_WILDCARD.charAt(0);
|
|
||||||
|
|
||||||
|
|
||||||
char[] consensusChars = new char[a.length()];
|
|
||||||
double[] consensusQuals = new double[a.length()];
|
|
||||||
|
|
||||||
for (int i=0; i < a.length(); i++) {
|
|
||||||
chA = a.charAt(i);
|
|
||||||
chB = b.charAt(i);
|
|
||||||
|
|
||||||
if ((chA != chB) && (chA != wc) && (chB != wc))
|
|
||||||
return null;
|
|
||||||
|
|
||||||
if ((chA == wc) && (chB == wc)) {
|
|
||||||
consensusChars[i] = wc;
|
|
||||||
consensusQuals[i] = 0.0;
|
|
||||||
}
|
|
||||||
else if ((chA == wc) && (chB != wc)) {
|
|
||||||
consensusChars[i] = chB;
|
|
||||||
consensusQuals[i] = haplotypeB.quals[i];
|
|
||||||
}
|
|
||||||
else if ((chA != wc) && (chB == wc)){
|
|
||||||
consensusChars[i] = chA;
|
|
||||||
consensusQuals[i] = haplotypeA.quals[i];
|
|
||||||
} else {
|
|
||||||
consensusChars[i] = chA;
|
|
||||||
consensusQuals[i] = haplotypeA.quals[i]+haplotypeB.quals[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return new Haplotype(new String(consensusChars), consensusQuals);
|
|
||||||
}
|
|
||||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
|
||||||
private double scoreReadsAgainstHaplotypes(List<Haplotype> haplotypes, ReadBackedPileup pileup, int contextSize) {
|
|
||||||
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
|
||||||
// if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(1));
|
|
||||||
|
|
||||||
double[][] haplotypeScores = new double[pileup.size()][haplotypes.size()];
|
|
||||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
|
||||||
SAMRecord read = p.getRead();
|
|
||||||
int readOffsetFromPileup = p.getOffset();
|
|
||||||
|
|
||||||
if (ReadUtils.is454Read(read))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
|
||||||
double m = 10000000;
|
|
||||||
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
|
||||||
Haplotype haplotype = haplotypes.get(i);
|
|
||||||
int start = readOffsetFromPileup - (contextSize - 1)/2;
|
|
||||||
double score = scoreReadAgainstHaplotype(read, start, contextSize, haplotype);
|
|
||||||
haplotypeScores[p.getPileupOffset()][i] = score;
|
|
||||||
if ( DEBUG ) System.out.printf(" vs. haplotype %d = %f%n", i, score);
|
|
||||||
m = Math.min(score, m);
|
|
||||||
}
|
|
||||||
if ( DEBUG ) System.out.printf(" => best score was %f%n", m);
|
|
||||||
}
|
|
||||||
|
|
||||||
double overallScore = 0.0;
|
|
||||||
for ( double[] readHaplotypeScores : haplotypeScores ) {
|
|
||||||
overallScore += MathUtils.arrayMin(readHaplotypeScores);
|
|
||||||
}
|
|
||||||
|
|
||||||
return overallScore;
|
|
||||||
}
|
|
||||||
|
|
||||||
private double scoreReadAgainstHaplotype(SAMRecord read, int baseOffsetStart, int contextSize, Haplotype haplotype ) {
|
|
||||||
double expected = 0.0;
|
|
||||||
double mismatches = 0.0;
|
|
||||||
|
|
||||||
// What's the expected mismatch rate under the model that this read is actually sampled from
|
|
||||||
// this haplotype? Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
|
|
||||||
// the observed base is actually from a c with an error rate e. Since e is the rate at which we'd
|
|
||||||
// see a miscalled c, the expected mismatch rate is really e. So the expected number of mismatches
|
|
||||||
// is just sum_i e_i for i from 1..n for n sites
|
|
||||||
//
|
|
||||||
// Now, what's the probabilistic sum of mismatches? Suppose that the base b is equal to c. Well, it could
|
|
||||||
// actually be a miscall in a matching direction, which would happen at a e / 3 rate. If b != c, then
|
|
||||||
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
|
|
||||||
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
|
|
||||||
for ( int i = 0; i < contextSize; i++ ) {
|
|
||||||
int baseOffset = i + baseOffsetStart;
|
|
||||||
if ( baseOffset < 0 )
|
|
||||||
continue;
|
|
||||||
if ( baseOffset >= read.getReadLength() )
|
|
||||||
break;
|
|
||||||
|
|
||||||
byte haplotypeBase = haplotype.bases[i];
|
|
||||||
byte readBase = read.getReadBases()[baseOffset];
|
|
||||||
|
|
||||||
boolean matched = BaseUtils.basesAreEqual(readBase, haplotypeBase );
|
|
||||||
double e = QualityUtils.qualToErrorProb(read.getBaseQualities()[baseOffset]);
|
|
||||||
expected += e;
|
|
||||||
mismatches += matched ? e : 1 - e / 3;
|
|
||||||
|
|
||||||
// a more sophisticated calculation would include the reference quality, but it's nice to actually penalize
|
|
||||||
// the mismatching of poorly determined regions of the consensus
|
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("Read %s: scoring %c vs. %c => e = %f Q%d esum %f vs. msum %f%n",
|
|
||||||
read.getReadName(), (char)haplotypeBase, (char)readBase, e, read.getBaseQualities()[baseOffset], expected, mismatches);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mismatches - expected;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private static final double[] FLAT_BASE_PRIORS = new double[BaseUtils.Base.values().length];
|
|
||||||
static {
|
|
||||||
for ( int i = 0; i < BaseUtils.Base.values().length; i++ )
|
|
||||||
FLAT_BASE_PRIORS[i] = Math.log10(1.0 / BaseUtils.Base.values().length);
|
|
||||||
}
|
|
||||||
|
|
||||||
private class Haplotype {
|
|
||||||
byte[] bases = null;
|
|
||||||
double[] quals = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
|
||||||
*
|
|
||||||
* @param bases
|
|
||||||
* @param qual
|
|
||||||
*/
|
|
||||||
Haplotype(byte[] bases, int qual) {
|
|
||||||
this.bases = bases;
|
|
||||||
quals = new double[bases.length];
|
|
||||||
Arrays.fill(quals, (double)qual);
|
|
||||||
}
|
|
||||||
|
|
||||||
Haplotype(byte[] bases, double[] quals) {
|
|
||||||
this.bases = bases;
|
|
||||||
this.quals = quals;
|
|
||||||
}
|
|
||||||
|
|
||||||
Haplotype(String bases, double[] quals) {
|
|
||||||
this.bases = bases.getBytes();
|
|
||||||
this.quals = quals;
|
|
||||||
}
|
|
||||||
public String toString() { return new String(this.bases); }
|
|
||||||
|
|
||||||
double getQualitySum() {
|
|
||||||
double s = 0;
|
|
||||||
for (int k=0; k < bases.length; k++) {
|
|
||||||
s += quals[k];
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("MyHaplotypeScore"); }
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MyHaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with two (and only two) segregating haplotypes")); }
|
|
||||||
}
|
|
||||||
|
|
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testHasAnnotsAsking1() {
|
public void testHasAnnotsAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("184fc1f99dfba3e2519d16458d728fcc"));
|
Arrays.asList("674541eb78fa6e4f9bee172b3f34bbab"));
|
||||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testHasAnnotsAsking2() {
|
public void testHasAnnotsAsking2() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList("12a069d5f1c9cd3970ea6301397219aa"));
|
Arrays.asList("985c55e3f0c41082dc56f7a291ef307a"));
|
||||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testNoAnnotsAsking1() {
|
public void testNoAnnotsAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("8f42df642b329ff19bc2c39470117280"));
|
Arrays.asList("c789e8f795cf4b182f717423bf3328f2"));
|
||||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testNoAnnotsAsking2() {
|
public void testNoAnnotsAsking2() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList("908aa4b6ac65bee57f91bc6fed4d46ad"));
|
Arrays.asList("be18a3b9589ea60350fbaf8f7e1dd769"));
|
||||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSamplePilot1Joint() {
|
public void testMultiSamplePilot1Joint() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
|
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
|
||||||
Arrays.asList("3a402233264e21a84d421e3a4ea64768"));
|
Arrays.asList("0c1a5f6372f6d17544a95292bd313c86"));
|
||||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSamplePilot2Joint() {
|
public void testMultiSamplePilot2Joint() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
|
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList("79736b3e955a16b30f827b2786fc08b1"));
|
Arrays.asList("049c98bc6db7029995f8893f570dd9ad"));
|
||||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testSingleSamplePilot2Joint() {
|
public void testSingleSamplePilot2Joint() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
|
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("b8d93c6fcb4b17d454cdcbfc4b43f076"));
|
Arrays.asList("6c278b09727cd14523423a6beda627a5"));
|
||||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParallelization() {
|
public void testParallelization() {
|
||||||
String md5 = "098802639cfab1b777c96d38376f118a";
|
String md5 = "c66c55363fb9e951569667e3ff6eb728";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
|
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
|
||||||
|
|
@ -90,11 +90,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testParameter() {
|
public void testParameter() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( "-genotype", "b87f28a772eb75c8dad9062c6d039da5" );
|
e.put( "-genotype", "dfe1a220ee0966a20af4a2fca9e635fe" );
|
||||||
e.put( "-all_bases", "ec9de2044cd5f5901d6879b56f12993a" );
|
e.put( "-all_bases", "e7b766485dd37631b3fcf9f655588456" );
|
||||||
e.put( "--min_base_quality_score 26", "875c64a64fd402626e04c9540388c483" );
|
e.put( "--min_base_quality_score 26", "e4e318853e091e63166eaef648ec26ac" );
|
||||||
e.put( "--min_mapping_quality_score 26", "de7d90e425f8f08f219dc91a25d60c68" );
|
e.put( "--min_mapping_quality_score 26", "57fffb32a3a7a736f6c30ae08ef71c91" );
|
||||||
e.put( "--max_mismatches_in_40bp_window 5", "758e312b2a2e7c4d83f60174d43fac8a" );
|
e.put( "--max_mismatches_in_40bp_window 5", "4f7a5c22247234c4ee3ca2cfbbc16729" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -108,12 +108,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testConfidence() {
|
public void testConfidence() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||||
Arrays.asList("4937bab94b0bae1aa61cdf3a06cb49e8"));
|
Arrays.asList("8b4ebcbf330340a437e572a73a99227b"));
|
||||||
executeTest("testConfidence1", spec1);
|
executeTest("testConfidence1", spec1);
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||||
Arrays.asList("f8b722dad5c4868a4bba246eef83f96d"));
|
Arrays.asList("0d8825a98a8918bed2c02ac44e15dde7"));
|
||||||
executeTest("testConfidence2", spec2);
|
executeTest("testConfidence2", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue