Have Haplotype extend the Allele class.
This way, we don't need to create a new Allele for every read/Haplotype pair to be placed in the PerReadAlleleLikelihoodMap (very inefficient). Also, now we can easily get the Haplotype associated with the best allele for a given read.
This commit is contained in:
parent
94800771e3
commit
d3baa4b8ca
|
|
@ -47,7 +47,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.sun.corba.se.impl.logging.UtilSystemException;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
|
|
@ -155,7 +154,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
protected StingSAMFileWriter bamWriter = null;
|
||||
private SAMFileHeader bamHeader = null;
|
||||
private long uniqueNameCounter = 1;
|
||||
private final String readGroupId = "ArtificialHaplotype";
|
||||
private final static String readGroupId = "ArtificialHaplotype";
|
||||
|
||||
/**
|
||||
* The PairHMM implementation to use for genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime.
|
||||
|
|
@ -338,20 +337,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
||||
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
|
||||
|
||||
if ( bamWriter != null ) {
|
||||
// prepare the bam header
|
||||
bamHeader = new SAMFileHeader();
|
||||
bamHeader.setSequenceDictionary(getToolkit().getSAMFileHeader().getSequenceDictionary());
|
||||
final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>(1);
|
||||
final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupId);
|
||||
rg.setSample("HC");
|
||||
rg.setSequencingCenter("BI");
|
||||
readGroups.add(rg);
|
||||
bamHeader.setReadGroups(readGroups);
|
||||
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
bamWriter.writeHeader(bamHeader);
|
||||
bamWriter.setPresorted(true);
|
||||
}
|
||||
if ( bamWriter != null )
|
||||
setupBamWriter();
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -461,8 +448,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do!
|
||||
|
||||
finalizeActiveRegion( activeRegion ); // merge overlapping fragments, clip adapter and low qual tails
|
||||
final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader)); // Create the reference haplotype which is the bases from the reference that make up the active region
|
||||
referenceHaplotype.setIsReference(true);
|
||||
final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region
|
||||
final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
|
||||
//int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion ));
|
||||
final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, getPaddedLoc(activeRegion), MIN_PRUNE_FACTOR, activeAllelesToGenotype );
|
||||
|
|
@ -498,22 +484,19 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
|
||||
if ( bamWriter != null ) {
|
||||
// sort the haplotypes in coordinate order and then write them to the bam
|
||||
Collections.sort( haplotypes, new Haplotype.HaplotypePositionComparator() );
|
||||
final GenomeLoc paddedRefLoc = getPaddedLoc(activeRegion);
|
||||
for ( Haplotype haplotype : haplotypes ) {
|
||||
// TODO -- clean up this code
|
||||
final GATKSAMRecord record = new GATKSAMRecord(bamHeader);
|
||||
record.setReadBases(haplotype.getBases());
|
||||
record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
|
||||
record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
|
||||
record.setCigar(haplotype.getCigar());
|
||||
record.setMappingQuality(bestHaplotypes.contains(haplotype) ? 60 : 0);
|
||||
record.setReadName("HC" + uniqueNameCounter++);
|
||||
record.setReadUnmappedFlag(false);
|
||||
record.setReferenceIndex(activeRegion.getReferenceLoc().getContigIndex());
|
||||
record.setAttribute(SAMTag.RG.toString(), readGroupId);
|
||||
record.setFlags(16);
|
||||
bamWriter.addAlignment(record);
|
||||
for ( Haplotype haplotype : haplotypes )
|
||||
writeHaplotype(haplotype, paddedRefLoc, bestHaplotypes.contains(haplotype));
|
||||
|
||||
// now, output the interesting reads for each sample
|
||||
for ( final PerReadAlleleLikelihoodMap readAlleleLikelihoodMap : stratifiedReadMap.values() ) {
|
||||
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
|
||||
final Allele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele != Allele.NO_CALL )
|
||||
writeReadAgainstHaplotype(entry.getKey(), (Haplotype)bestAllele);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -608,6 +591,46 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
|
||||
return returnMap;
|
||||
}
|
||||
|
||||
private void setupBamWriter() {
|
||||
// prepare the bam header
|
||||
bamHeader = new SAMFileHeader();
|
||||
bamHeader.setSequenceDictionary(getToolkit().getSAMFileHeader().getSequenceDictionary());
|
||||
bamHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
|
||||
// include the original read groups plus a new artificial one for the haplotypes
|
||||
final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>(getToolkit().getSAMFileHeader().getReadGroups());
|
||||
final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupId);
|
||||
rg.setSample("HC");
|
||||
rg.setSequencingCenter("BI");
|
||||
readGroups.add(rg);
|
||||
bamHeader.setReadGroups(readGroups);
|
||||
|
||||
bamWriter.writeHeader(bamHeader);
|
||||
bamWriter.setPresorted(true);
|
||||
}
|
||||
|
||||
private void writeHaplotype(final Haplotype haplotype, final GenomeLoc paddedRefLoc, final boolean isAmongBestHaplotypes) {
|
||||
final GATKSAMRecord record = new GATKSAMRecord(bamHeader);
|
||||
record.setReadBases(haplotype.getBases());
|
||||
record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
|
||||
record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
|
||||
record.setCigar(haplotype.getCigar());
|
||||
record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0);
|
||||
record.setReadName("HC" + uniqueNameCounter++);
|
||||
record.setReadUnmappedFlag(false);
|
||||
record.setReferenceIndex(paddedRefLoc.getContigIndex());
|
||||
record.setAttribute(SAMTag.RG.toString(), readGroupId);
|
||||
record.setFlags(16);
|
||||
bamWriter.addAlignment(record);
|
||||
}
|
||||
|
||||
private void writeReadAgainstHaplotype(final GATKSAMRecord read, final Haplotype haplotype) {
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -138,6 +138,7 @@ public class LikelihoodCalculationEngine {
|
|||
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
|
||||
// TODO -- why is Q18 hard-coded here???
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
}
|
||||
|
||||
|
|
@ -151,7 +152,7 @@ public class LikelihoodCalculationEngine {
|
|||
final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
|
||||
previousHaplotypeSeen = haplotype;
|
||||
|
||||
perReadAlleleLikelihoodMap.add(read, Allele.create(haplotype.getBases()),
|
||||
perReadAlleleLikelihoodMap.add(read, haplotype,
|
||||
pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(),
|
||||
readQuals, readInsQuals, readDelQuals, overallGCP, haplotypeStart, jjj == 0));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,59 +37,71 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
|
|||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
public class Haplotype {
|
||||
protected final byte[] bases;
|
||||
public class Haplotype extends Allele {
|
||||
protected final double[] quals;
|
||||
private GenomeLoc genomeLocation = null;
|
||||
private HashMap<Integer, VariantContext> eventMap = null;
|
||||
private boolean isRef = false;
|
||||
private Cigar cigar;
|
||||
private int alignmentStartHapwrtRef;
|
||||
public int leftBreakPoint = 0;
|
||||
public int rightBreakPoint = 0;
|
||||
private Event artificialEvent = null;
|
||||
|
||||
/**
|
||||
* Main constructor
|
||||
*
|
||||
* @param bases bases
|
||||
* @param quals quals
|
||||
* @param isRef is reference allele?
|
||||
*/
|
||||
public Haplotype( final byte[] bases, final double[] quals, final boolean isRef ) {
|
||||
super(bases.clone(), isRef);
|
||||
this.quals = quals.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
||||
*
|
||||
* @param bases bases
|
||||
* @param qual qual
|
||||
*/
|
||||
public Haplotype( final byte[] bases, final int qual ) {
|
||||
this.bases = bases.clone();
|
||||
public Haplotype( final byte[] bases, final int qual, final boolean isRef ) {
|
||||
super(bases.clone(), isRef);
|
||||
quals = new double[bases.length];
|
||||
Arrays.fill(quals, (double)qual);
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases, final int qual ) {
|
||||
this(bases, qual, false);
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases, final boolean isRef ) {
|
||||
this(bases, 0, isRef);
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases, final double[] quals ) {
|
||||
this.bases = bases.clone();
|
||||
this.quals = quals.clone();
|
||||
this(bases, quals, false);
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases ) {
|
||||
this(bases, 0);
|
||||
this(bases, 0, false);
|
||||
}
|
||||
|
||||
protected Haplotype( final byte[] bases, final Event artificialEvent ) {
|
||||
this(bases, 0);
|
||||
this(bases, 0, false);
|
||||
this.artificialEvent = artificialEvent;
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases, final GenomeLoc loc ) {
|
||||
this(bases);
|
||||
this(bases, 0, false);
|
||||
this.genomeLocation = loc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object h ) {
|
||||
return h instanceof Haplotype && Arrays.equals(bases, ((Haplotype) h).bases);
|
||||
return h instanceof Haplotype && super.equals(h);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Arrays.hashCode(bases);
|
||||
}
|
||||
|
||||
public HashMap<Integer, VariantContext> getEventMap() {
|
||||
return eventMap;
|
||||
}
|
||||
|
|
@ -98,17 +110,9 @@ public class Haplotype {
|
|||
this.eventMap = eventMap;
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return isRef;
|
||||
}
|
||||
|
||||
public void setIsReference( boolean isRef ) {
|
||||
this.isRef = isRef;
|
||||
}
|
||||
|
||||
public double getQualitySum() {
|
||||
double s = 0;
|
||||
for (int k=0; k < bases.length; k++) {
|
||||
for (int k=0; k < quals.length; k++) {
|
||||
s += quals[k];
|
||||
}
|
||||
return s;
|
||||
|
|
@ -116,14 +120,14 @@ public class Haplotype {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new String(bases);
|
||||
return getDisplayString();
|
||||
}
|
||||
|
||||
public double[] getQuals() {
|
||||
return quals.clone();
|
||||
}
|
||||
public byte[] getBases() {
|
||||
return bases.clone();
|
||||
return super.getBases().clone();
|
||||
}
|
||||
|
||||
public long getStartPosition() {
|
||||
|
|
@ -178,13 +182,13 @@ public class Haplotype {
|
|||
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
|
||||
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
|
||||
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
|
||||
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= getBases().length ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
return null;
|
||||
}
|
||||
byte[] newHaplotypeBases = new byte[]{};
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(getBases(), 0, haplotypeInsertLocation)); // bases before the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(getBases(), haplotypeInsertLocation + refAllele.length(), getBases().length)); // bases after the variant
|
||||
return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ public class Allele implements Comparable<Allele> {
|
|||
/** A generic static NO_CALL allele for use */
|
||||
|
||||
// no public way to create an allele
|
||||
private Allele(byte[] bases, boolean isRef) {
|
||||
protected Allele(byte[] bases, boolean isRef) {
|
||||
// null alleles are no longer allowed
|
||||
if ( wouldBeNullAllele(bases) ) {
|
||||
throw new IllegalArgumentException("Null alleles are not supported");
|
||||
|
|
@ -140,7 +140,7 @@ public class Allele implements Comparable<Allele> {
|
|||
throw new IllegalArgumentException("Unexpected base in allele bases \'" + new String(bases)+"\'");
|
||||
}
|
||||
|
||||
private Allele(String bases, boolean isRef) {
|
||||
protected Allele(String bases, boolean isRef) {
|
||||
this(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue