Merged bug fix from Stable into Unstable

This commit is contained in:
Ryan Poplin 2012-08-22 14:40:34 -04:00
commit fe3069b278
6 changed files with 37 additions and 65 deletions

View File

@ -42,14 +42,12 @@ import java.util.*;
public class GenotypingEngine {
private final boolean DEBUG;
private final int MNP_LOOK_AHEAD;
private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
this.DEBUG = DEBUG;
this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD;
this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
noCall.add(Allele.NO_CALL);
}
@ -120,7 +118,7 @@ public class GenotypingEngine {
System.out.println( "> Cigar = " + h.getCigar() );
}
// Walk along the alignment and turn any difference from the reference into an event
h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
startPosKeySet.addAll(h.getEventMap().keySet());
}
@ -203,7 +201,7 @@ public class GenotypingEngine {
if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
for( final Haplotype h : haplotypes ) {
// Walk along the alignment and turn any difference from the reference into an event
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
if( DEBUG ) {
System.out.println( h.toString() );
@ -521,11 +519,7 @@ public class GenotypingEngine {
return false;
}
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code
}
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
int refPos = alignmentStartHapwrtRef;
@ -543,7 +537,7 @@ public class GenotypingEngine {
if( BaseUtils.isRegularBase(refByte) ) {
insertionAlleles.add( Allele.create(refByte, true) );
}
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
} else {
byte[] insertionBases = new byte[]{};
@ -590,39 +584,16 @@ public class GenotypingEngine {
case EQ:
case X:
{
int numSinceMismatch = -1;
int stopOfMismatch = -1;
int startOfMismatch = -1;
int refPosStartOfMismatch = -1;
for( int iii = 0; iii < elementLength; iii++ ) {
if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) {
// SNP or start of possible MNP
if( stopOfMismatch == -1 ) {
startOfMismatch = alignmentPos;
stopOfMismatch = alignmentPos;
numSinceMismatch = 0;
refPosStartOfMismatch = refPos;
} else {
stopOfMismatch = alignmentPos;
}
}
if( stopOfMismatch != -1) {
numSinceMismatch++;
}
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
final byte refByte = ref[refPos];
final byte altByte = alignment[alignmentPos];
if( refByte != altByte ) { // SNP!
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) {
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
snpAlleles.add( Allele.create( refBases, true ) );
snpAlleles.add( Allele.create( mismatchBases, false ) );
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
snpAlleles.add( Allele.create( refByte, true ) );
snpAlleles.add( Allele.create( altByte, false ) );
vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make());
}
numSinceMismatch = -1;
stopOfMismatch = -1;
startOfMismatch = -1;
refPosStartOfMismatch = -1;
}
refPos++;
alignmentPos++;

View File

@ -119,10 +119,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
protected String keepRG = null;
@Hidden
@Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false)
protected int MNP_LOOK_AHEAD = 0;
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
protected int MIN_PRUNE_FACTOR = 1;
@ -135,7 +131,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
@Advanced
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false)
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
protected int gcpHMM = 10;
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
@ -189,21 +185,21 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@ArgumentCollection
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
// the calculation arguments
private UnifiedGenotyperEngine UG_engine = null;
private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
protected boolean DEBUG;
// the UG engines
private UnifiedGenotyperEngine UG_engine = null;
private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
// the assembly engine
LocalAssemblyEngine assemblyEngine = null;
private LocalAssemblyEngine assemblyEngine = null;
// the likelihoods engine
LikelihoodCalculationEngine likelihoodCalculationEngine = null;
private LikelihoodCalculationEngine likelihoodCalculationEngine = null;
// the genotyping engine
GenotypingEngine genotypingEngine = null;
private GenotypingEngine genotypingEngine = null;
// the annotation engine
private VariantAnnotatorEngine annotationEngine;
@ -289,7 +285,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
}
//---------------------------------------------------------------------------------------------------------------

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
@ -337,8 +338,8 @@ public class HaplotypeResolver extends RodWalker<Integer, Integer> {
}
// order results by start position
final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0));
final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0));
final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source1Haplotype), 0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1));
final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source2Haplotype), 0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2));
if ( source1Map.size() == 0 || source2Map.size() == 0 ) {
// TODO -- handle errors appropriately
logger.debug("No source alleles; aborting at " + refContext.getLocus());

View File

@ -293,7 +293,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {

View File

@ -142,7 +142,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
byte[] ref;
byte[] hap;
HashMap<Integer,Byte> expected;
GenotypingEngine ge = new GenotypingEngine(false, 0, false);
public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
@ -153,7 +152,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
public HashMap<Integer,VariantContext> calcAlignment() {
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0);
return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name");
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.activeregion;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.HasGenomeLocation;
@ -58,9 +59,7 @@ public class ActiveRegion implements HasGenomeLocation {
}
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
return referenceReader.getSubsequenceAt( extendedLoc.getContig(),
Math.max(1, extendedLoc.getStart() - padding),
Math.min(referenceReader.getSequenceDictionary().getSequence(extendedLoc.getContig()).getSequenceLength(), extendedLoc.getStop() + padding) ).getBases();
return getReference( referenceReader, padding, extendedLoc );
}
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
@ -68,9 +67,15 @@ public class ActiveRegion implements HasGenomeLocation {
}
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(),
Math.max(1, fullExtentReferenceLoc.getStart() - padding),
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
return getReference( referenceReader, padding, fullExtentReferenceLoc );
}
private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(),
Math.max(1, genomeLoc.getStart() - padding),
Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
StringUtil.toUpperCase(reference);
return reference;
}
@Override