From a13d125ba15345a17ad8f4de5e7183493b2d0ea2 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Thu, 7 Jun 2012 16:56:32 -0400 Subject: [PATCH 1/5] Split out contig names from Reference .fai file on white space (to support the GATK resource bundle's file human_g1k_v37.fasta.fai.gz, which does not use tab delimiters) --- public/perl/sortByRef.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/perl/sortByRef.pl b/public/perl/sortByRef.pl index 71d3f4477..e17707796 100755 --- a/public/perl/sortByRef.pl +++ b/public/perl/sortByRef.pl @@ -50,7 +50,7 @@ my %ref_order; my $n = 0; while ( ) { chomp; - my ($contig, $rest) = split "\t"; + my ($contig, $rest) = split '\s'; die("Dictionary file is probably corrupt: multiple instances of contig $contig") if ( defined $ref_order{$contig} ); $ref_order{$contig} = $n; From 94540ccc278ffe7374abcebc2e1227c859b3159f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 22 Aug 2012 12:54:29 -0400 Subject: [PATCH 2/5] Using the simple VCBuilder constructor and then subsequently trying to modify attributes was throwing a NPE. This is easily solved (without a performance hit) by initializing the attributes map to an immutable Collections.emptyMap(). Added unit test to cover this case. --- .../sting/utils/variantcontext/VariantContextBuilder.java | 1 + .../sting/utils/variantcontext/VariantContextUnitTest.java | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index d8ab4bd23..40ac089ef 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -94,6 +94,7 @@ public class VariantContextBuilder { this.start = start; this.stop = stop; this.alleles = alleles; + this.attributes = Collections.emptyMap(); // immutable toValidate.add(VariantContext.Validation.ALLELES); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 272166c68..19620b8df 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -750,6 +750,10 @@ public class VariantContextUnitTest extends BaseTest { modified = new VariantContextBuilder(modified).attributes(null).attribute("AC", 1).make(); Assert.assertEquals(modified.getAttribute("AC"), 1); + // test the behavior when the builder's attribute object is not initialized + modified = new VariantContextBuilder(modified.getSource(), modified.getChr(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make(); + + // test normal attribute modification modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make(); Assert.assertEquals(modified.getAttribute("AC"), 1); modified = new VariantContextBuilder(modified).attribute("AC", 2).make(); From 944e1c299df52909d46deb9eae0138376e0b8001 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 22 Aug 2012 13:07:13 -0400 Subject: [PATCH 3/5] Docs for --keepOriginalAC were wrong in SelectVariants --- .../sting/gatk/walkers/variantutils/SelectVariants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 4c0c0cabf..fc29a7f02 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -265,7 +265,7 @@ public class SelectVariants extends RodWalker implements TreeR @Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false) private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL; - @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false) + @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Store the original AC, AF, and AN values in the INFO field after selecting (using keys AC_Orig, AF_Orig, and AN_Orig)", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; /** From 63213e8eb5d45173c99493813cde4af1d5e46416 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 22 Aug 2012 14:18:44 -0400 Subject: [PATCH 4/5] Expanding the HaplotypeCaller integration tests to cover a wider range of data --- .../HaplotypeCallerIntegrationTest.java | 30 ++++++++++++++++--- .../annotator/ClippingRankSumTest.java | 3 -- .../walkers/annotator/ReadPosRankSumTest.java | 1 - .../utils/activeregion/ActiveRegion.java | 1 - 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index fc9d56660..2ae1f2ca5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -8,9 +8,10 @@ import java.util.Arrays; public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String REF = b37KGReference; final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam"; + final String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam"; final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; + final String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam"; final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; - //final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr"; private void HCTest(String bam, String args, String md5) { final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; @@ -34,14 +35,35 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { } private void HCTestComplexVariants(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3"; + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 2"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); } @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "316a0fb9c66c0a6aa40a170d5d8c0021"); + HCTestComplexVariants(CEUTRIO_BAM, "", "3424b398a9f47c8ac606a5c56eb7d8a7"); + } + + private void HCTestSymbolicVariants(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 2"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerSingleSampleSymbolic() { + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b71cfaea9390136c584c9671b149d573"); + } + + private void HCTestIndelQualityScores(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerSingleSampleIndelQualityScores() { + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e1f88fac91424740c0eaac1de48b3970"); } } - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index 449e047cd..1fd220f2f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -1,11 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 95fadfd46..1ac8ee113 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -5,7 +5,6 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 6756c1c02..1425800d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -73,7 +73,6 @@ public class ActiveRegion implements HasGenomeLocation { Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases(); } - @Override public GenomeLoc getLocation() { return activeRegionLoc; } public GenomeLoc getExtendedLoc() { return extendedLoc; } From e5cfdb481198d4b3aac43bf6094f71a1396dbaec Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 22 Aug 2012 14:39:35 -0400 Subject: [PATCH 5/5] Bug fix for popular _Duplicate allele added to VariantContext_ error reported on the forum. It seems to be due to lower case bases in the reference being treated as reference mismatches. We would try to turn these mismatches into SNP events, for example c/C. We now uppercase the result from IndexedFastaSequenceFile.getSubsequenceAt() --- .../haplotypecaller/GenotypingEngine.java | 53 +++++-------------- .../haplotypecaller/HaplotypeCaller.java | 22 ++++---- .../haplotypecaller/HaplotypeResolver.java | 5 +- .../SimpleDeBruijnAssembler.java | 2 +- .../GenotypingEngineUnitTest.java | 3 +- .../utils/activeregion/ActiveRegion.java | 17 +++--- 6 files changed, 37 insertions(+), 65 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index c56cf5bf2..9de9b3292 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -42,14 +42,12 @@ import java.util.*; public class GenotypingEngine { private final boolean DEBUG; - private final int MNP_LOOK_AHEAD; private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE; private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false); - public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { + public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { this.DEBUG = DEBUG; - this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD; this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE; noCall.add(Allele.NO_CALL); } @@ -120,7 +118,7 @@ public class GenotypingEngine { System.out.println( "> Cigar = " + h.getCigar() ); } // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) ); startPosKeySet.addAll(h.getEventMap().keySet()); } @@ -203,7 +201,7 @@ public class GenotypingEngine { if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); } for( final Haplotype h : haplotypes ) { // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) ); if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); } if( DEBUG ) { System.out.println( h.toString() ); @@ -521,11 +519,7 @@ public class GenotypingEngine { return false; } - protected static HashMap generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { - return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code - } - - protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { + protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { final HashMap vcs = new HashMap(); int refPos = alignmentStartHapwrtRef; @@ -543,7 +537,7 @@ public class GenotypingEngine { if( BaseUtils.isRegularBase(refByte) ) { insertionAlleles.add( Allele.create(refByte, true) ); } - if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { + if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); } else { byte[] insertionBases = new byte[]{}; @@ -590,39 +584,16 @@ public class GenotypingEngine { case EQ: case X: { - int numSinceMismatch = -1; - int stopOfMismatch = -1; - int startOfMismatch = -1; - int refPosStartOfMismatch = -1; for( int iii = 0; iii < elementLength; iii++ ) { - if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) { - // SNP or start of possible MNP - if( stopOfMismatch == -1 ) { - startOfMismatch = alignmentPos; - stopOfMismatch = alignmentPos; - numSinceMismatch = 0; - refPosStartOfMismatch = refPos; - } else { - stopOfMismatch = alignmentPos; - } - } - if( stopOfMismatch != -1) { - numSinceMismatch++; - } - if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) { - final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 ); - final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 ); - if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) { + final byte refByte = ref[refPos]; + final byte altByte = alignment[alignmentPos]; + if( refByte != altByte ) { // SNP! + if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) { final ArrayList snpAlleles = new ArrayList(); - snpAlleles.add( Allele.create( refBases, true ) ); - snpAlleles.add( Allele.create( mismatchBases, false ) ); - final int snpStart = refLoc.getStart() + refPosStartOfMismatch; - vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); + snpAlleles.add( Allele.create( refByte, true ) ); + snpAlleles.add( Allele.create( altByte, false ) ); + vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make()); } - numSinceMismatch = -1; - stopOfMismatch = -1; - startOfMismatch = -1; - refPosStartOfMismatch = -1; } refPos++; alignmentPos++; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 4f434bba6..8079c2e1f 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -122,10 +122,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) protected String keepRG = null; - @Hidden - @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false) - protected int MNP_LOOK_AHEAD = 0; - @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) protected int MIN_PRUNE_FACTOR = 1; @@ -138,7 +134,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; @Advanced - @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false) + @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) @@ -192,21 +188,21 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @ArgumentCollection private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection(); - // the calculation arguments - private UnifiedGenotyperEngine UG_engine = null; - private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; - @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false) protected boolean DEBUG; + // the UG engines + private UnifiedGenotyperEngine UG_engine = null; + private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; + // the assembly engine - LocalAssemblyEngine assemblyEngine = null; + private LocalAssemblyEngine assemblyEngine = null; // the likelihoods engine - LikelihoodCalculationEngine likelihoodCalculationEngine = null; + private LikelihoodCalculationEngine likelihoodCalculationEngine = null; // the genotyping engine - GenotypingEngine genotypingEngine = null; + private GenotypingEngine genotypingEngine = null; // the annotation engine private VariantAnnotatorEngine annotationEngine; @@ -292,7 +288,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter ); likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false ); - genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); + genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index 38f11cc5d..8a401439b 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.SWPairwiseAlignment; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; @@ -337,8 +338,8 @@ public class HaplotypeResolver extends RodWalker { } // order results by start position - final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0)); - final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0)); + final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source1Haplotype), 0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1)); + final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source2Haplotype), 0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2)); if ( source1Map.size() == 0 || source2Map.size() == 0 ) { // TODO -- handle errors appropriately logger.debug("No source alleles; aborting at " + refContext.getLocus()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 56cb6c3d4..93fd36a22 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -292,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place + final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 539190fe9..07e7b0d92 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -142,7 +142,6 @@ public class GenotypingEngineUnitTest extends BaseTest { byte[] ref; byte[] hap; HashMap expected; - GenotypingEngine ge = new GenotypingEngine(false, 0, false); public BasicGenotypingTestProvider(String refString, String hapString, HashMap expected) { super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString)); @@ -153,7 +152,7 @@ public class GenotypingEngineUnitTest extends BaseTest { public HashMap calcAlignment() { final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap); - return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0); + return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 8e660350f..dab8ddc78 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.activeregion; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; @@ -58,9 +59,7 @@ public class ActiveRegion implements HasGenomeLocation, Comparable } public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { - return referenceReader.getSubsequenceAt( extendedLoc.getContig(), - Math.max(1, extendedLoc.getStart() - padding), - Math.min(referenceReader.getSequenceDictionary().getSequence(extendedLoc.getContig()).getSequenceLength(), extendedLoc.getStop() + padding) ).getBases(); + return getReference( referenceReader, padding, extendedLoc ); } public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) { @@ -68,9 +67,15 @@ public class ActiveRegion implements HasGenomeLocation, Comparable } public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { - return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(), - Math.max(1, fullExtentReferenceLoc.getStart() - padding), - Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases(); + return getReference( referenceReader, padding, fullExtentReferenceLoc ); + } + + private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { + final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(), + Math.max(1, genomeLoc.getStart() - padding), + Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases(); + StringUtil.toUpperCase(reference); + return reference; } @Override