There was no reason to be sharing the Haplotype infrastructure between the HaplotypeCaller and the HaplotypeScore annotation since they were really looking for different things.

Separated them out, adding efficiencies for the HaplotypeScore version.
This commit is contained in:
Eric Banks 2013-01-16 11:10:13 -05:00
parent 392b5cbcdf
commit 445735a4a5
3 changed files with 52 additions and 56 deletions

View File

@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
@ -217,14 +216,14 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final Haplotype haplotype1 = consensusHaplotypeQueue.poll();
List<Haplotype> hlist = new ArrayList<Haplotype>();
hlist.add(new Haplotype(haplotype1.getBases(), 60));
hlist.add(new Haplotype(haplotype1.getBases(), (byte)60));
for (int k = 1; k < haplotypesToCompute; k++) {
Haplotype haplotype2 = consensusHaplotypeQueue.poll();
if (haplotype2 == null) {
haplotype2 = haplotype1;
} // Sometimes only the reference haplotype can be found
hlist.add(new Haplotype(haplotype2.getBases(), 20));
hlist.add(new Haplotype(haplotype2.getBases(), (byte)20));
}
return hlist;
} else
@ -236,8 +235,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final byte[] haplotypeBases = new byte[contextSize];
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
final double[] baseQualities = new double[contextSize];
Arrays.fill(baseQualities, 0.0);
final byte[] baseQualities = new byte[contextSize];
Arrays.fill(baseQualities, (byte)0);
byte[] readBases = read.getReadBases();
readBases = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readBases); // Adjust the read bases based on the Cigar string
@ -267,7 +266,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
readQuals[baseOffset] = (byte) 0;
} // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them
haplotypeBases[i] = readBases[baseOffset];
baseQualities[i] = (double) readQuals[baseOffset];
baseQualities[i] = readQuals[baseOffset];
}
return new Haplotype(haplotypeBases, baseQualities);
@ -286,10 +285,10 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final int length = a.length;
final byte[] consensusChars = new byte[length];
final double[] consensusQuals = new double[length];
final byte[] consensusQuals = new byte[length];
final double[] qualsA = haplotypeA.getQuals();
final double[] qualsB = haplotypeB.getQuals();
final byte[] qualsA = haplotypeA.getQuals();
final byte[] qualsB = haplotypeB.getQuals();
for (int i = 0; i < length; i++) {
chA = a[i];
@ -300,7 +299,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
if ((chA == wc) && (chB == wc)) {
consensusChars[i] = wc;
consensusQuals[i] = 0.0;
consensusQuals[i] = 0;
} else if ((chA == wc)) {
consensusChars[i] = chB;
consensusQuals[i] = qualsB[i];
@ -309,7 +308,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
consensusQuals[i] = qualsA[i];
} else {
consensusChars[i] = chA;
consensusQuals[i] = qualsA[i] + qualsB[i];
consensusQuals[i] = (byte)((int)qualsA[i] + (int)qualsB[i]);
}
}
@ -433,7 +432,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
}
public List<String> getKeyNames() {
return Arrays.asList("HaplotypeScore");
}
@ -441,4 +439,39 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
}
private static class Haplotype {
private final byte[] bases;
private final byte[] quals;
private int qualitySum = -1;
public Haplotype( final byte[] bases, final byte[] quals ) {
this.bases = bases;
this.quals = quals;
}
public Haplotype( final byte[] bases, final byte qual ) {
this.bases = bases;
quals = new byte[bases.length];
Arrays.fill(quals, qual);
}
public double getQualitySum() {
if ( qualitySum == -1 ) {
qualitySum = 0;
for ( final byte qual : quals ) {
qualitySum += (int)qual;
}
}
return qualitySum;
}
public byte[] getQuals() {
return quals.clone();
}
public byte[] getBases() {
return bases.clone();
}
}
}

View File

@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
final Haplotype h = new Haplotype( path.getBases( graph ) );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
// for GGA mode, add the desired allele into the haplotype if it isn't already present

View File

@ -37,8 +37,8 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.Serializable;
import java.util.*;
public class Haplotype extends Allele {
protected final double[] quals;
public class Haplotype extends Allele {
private GenomeLoc genomeLocation = null;
private HashMap<Integer, VariantContext> eventMap = null;
private Cigar cigar;
@ -51,49 +51,23 @@ public class Haplotype extends Allele {
* Main constructor
*
* @param bases bases
* @param quals quals
* @param isRef is reference allele?
*/
public Haplotype( final byte[] bases, final double[] quals, final boolean isRef ) {
super(bases.clone(), isRef);
this.quals = quals.clone();
}
/**
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
*
* @param bases bases
* @param qual qual
*/
public Haplotype( final byte[] bases, final int qual, final boolean isRef ) {
super(bases.clone(), isRef);
quals = new double[bases.length];
Arrays.fill(quals, (double)qual);
}
public Haplotype( final byte[] bases, final int qual ) {
this(bases, qual, false);
}
public Haplotype( final byte[] bases, final boolean isRef ) {
this(bases, 0, isRef);
}
public Haplotype( final byte[] bases, final double[] quals ) {
this(bases, quals, false);
super(bases.clone(), isRef);
}
public Haplotype( final byte[] bases ) {
this(bases, 0, false);
this(bases, false);
}
protected Haplotype( final byte[] bases, final Event artificialEvent ) {
this(bases, 0, false);
this(bases, false);
this.artificialEvent = artificialEvent;
}
public Haplotype( final byte[] bases, final GenomeLoc loc ) {
this(bases, 0, false);
this(bases, false);
this.genomeLocation = loc;
}
@ -110,22 +84,11 @@ public class Haplotype extends Allele {
this.eventMap = eventMap;
}
public double getQualitySum() {
double s = 0;
for (int k=0; k < quals.length; k++) {
s += quals[k];
}
return s;
}
@Override
public String toString() {
return getDisplayString();
}
public double[] getQuals() {
return quals.clone();
}
public byte[] getBases() {
return super.getBases().clone();
}