diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index c56cf5bf2..9de9b3292 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -42,14 +42,12 @@ import java.util.*; public class GenotypingEngine { private final boolean DEBUG; - private final int MNP_LOOK_AHEAD; private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE; private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false); - public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { + public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { this.DEBUG = DEBUG; - this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD; this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE; noCall.add(Allele.NO_CALL); } @@ -120,7 +118,7 @@ public class GenotypingEngine { System.out.println( "> Cigar = " + h.getCigar() ); } // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) ); startPosKeySet.addAll(h.getEventMap().keySet()); } @@ -203,7 +201,7 @@ public class GenotypingEngine { if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); } for( final Haplotype h : haplotypes ) { // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) ); if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); } if( DEBUG ) { System.out.println( h.toString() ); @@ -521,11 +519,7 @@ public class GenotypingEngine { return false; } - protected static HashMap generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { - return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code - } - - protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { + protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { final HashMap vcs = new HashMap(); int refPos = alignmentStartHapwrtRef; @@ -543,7 +537,7 @@ public class GenotypingEngine { if( BaseUtils.isRegularBase(refByte) ) { insertionAlleles.add( Allele.create(refByte, true) ); } - if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { + if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); } else { byte[] insertionBases = new byte[]{}; @@ -590,39 +584,16 @@ public class GenotypingEngine { case EQ: case X: { - int numSinceMismatch = -1; - int stopOfMismatch = -1; - int startOfMismatch = -1; - int refPosStartOfMismatch = -1; for( int iii = 0; iii < elementLength; iii++ ) { - if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) { - // SNP or start of possible MNP - if( stopOfMismatch == -1 ) { - startOfMismatch = alignmentPos; - stopOfMismatch = alignmentPos; - numSinceMismatch = 0; - refPosStartOfMismatch = refPos; - } else { - stopOfMismatch = alignmentPos; - } - } - if( stopOfMismatch != -1) { - numSinceMismatch++; - } - if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) { - final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 ); - final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 ); - if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) { + final byte refByte = ref[refPos]; + final byte altByte = alignment[alignmentPos]; + if( refByte != altByte ) { // SNP! + if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) { final ArrayList snpAlleles = new ArrayList(); - snpAlleles.add( Allele.create( refBases, true ) ); - snpAlleles.add( Allele.create( mismatchBases, false ) ); - final int snpStart = refLoc.getStart() + refPosStartOfMismatch; - vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); + snpAlleles.add( Allele.create( refByte, true ) ); + snpAlleles.add( Allele.create( altByte, false ) ); + vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make()); } - numSinceMismatch = -1; - stopOfMismatch = -1; - startOfMismatch = -1; - refPosStartOfMismatch = -1; } refPos++; alignmentPos++; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 4f434bba6..8079c2e1f 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -122,10 +122,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) protected String keepRG = null; - @Hidden - @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false) - protected int MNP_LOOK_AHEAD = 0; - @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) protected int MIN_PRUNE_FACTOR = 1; @@ -138,7 +134,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; @Advanced - @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false) + @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) @@ -192,21 +188,21 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @ArgumentCollection private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection(); - // the calculation arguments - private UnifiedGenotyperEngine UG_engine = null; - private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; - @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false) protected boolean DEBUG; + // the UG engines + private UnifiedGenotyperEngine UG_engine = null; + private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; + // the assembly engine - LocalAssemblyEngine assemblyEngine = null; + private LocalAssemblyEngine assemblyEngine = null; // the likelihoods engine - LikelihoodCalculationEngine likelihoodCalculationEngine = null; + private LikelihoodCalculationEngine likelihoodCalculationEngine = null; // the genotyping engine - GenotypingEngine genotypingEngine = null; + private GenotypingEngine genotypingEngine = null; // the annotation engine private VariantAnnotatorEngine annotationEngine; @@ -292,7 +288,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter ); likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false ); - genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); + genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index 38f11cc5d..8a401439b 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.SWPairwiseAlignment; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; @@ -337,8 +338,8 @@ public class HaplotypeResolver extends RodWalker { } // order results by start position - final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0)); - final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0)); + final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source1Haplotype), 0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1)); + final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source2Haplotype), 0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2)); if ( source1Map.size() == 0 || source2Map.size() == 0 ) { // TODO -- handle errors appropriately logger.debug("No source alleles; aborting at " + refContext.getLocus()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 56cb6c3d4..93fd36a22 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -292,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place + final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 539190fe9..07e7b0d92 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -142,7 +142,6 @@ public class GenotypingEngineUnitTest extends BaseTest { byte[] ref; byte[] hap; HashMap expected; - GenotypingEngine ge = new GenotypingEngine(false, 0, false); public BasicGenotypingTestProvider(String refString, String hapString, HashMap expected) { super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString)); @@ -153,7 +152,7 @@ public class GenotypingEngineUnitTest extends BaseTest { public HashMap calcAlignment() { final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap); - return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0); + return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 8e660350f..dab8ddc78 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.activeregion; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; @@ -58,9 +59,7 @@ public class ActiveRegion implements HasGenomeLocation, Comparable } public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { - return referenceReader.getSubsequenceAt( extendedLoc.getContig(), - Math.max(1, extendedLoc.getStart() - padding), - Math.min(referenceReader.getSequenceDictionary().getSequence(extendedLoc.getContig()).getSequenceLength(), extendedLoc.getStop() + padding) ).getBases(); + return getReference( referenceReader, padding, extendedLoc ); } public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) { @@ -68,9 +67,15 @@ public class ActiveRegion implements HasGenomeLocation, Comparable } public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { - return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(), - Math.max(1, fullExtentReferenceLoc.getStart() - padding), - Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases(); + return getReference( referenceReader, padding, fullExtentReferenceLoc ); + } + + private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { + final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(), + Math.max(1, genomeLoc.getStart() - padding), + Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases(); + StringUtil.toUpperCase(reference); + return reference; } @Override