diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index c56cf5bf2..9de9b3292 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -42,14 +42,12 @@ import java.util.*; public class GenotypingEngine { private final boolean DEBUG; - private final int MNP_LOOK_AHEAD; private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE; private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false); - public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { + public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) { this.DEBUG = DEBUG; - this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD; this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE; noCall.add(Allele.NO_CALL); } @@ -120,7 +118,7 @@ public class GenotypingEngine { System.out.println( "> Cigar = " + h.getCigar() ); } // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) ); startPosKeySet.addAll(h.getEventMap().keySet()); } @@ -203,7 +201,7 @@ public class GenotypingEngine { if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); } for( final Haplotype h : haplotypes ) { // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) ); + h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) ); if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); } if( DEBUG ) { System.out.println( h.toString() ); @@ -521,11 +519,7 @@ public class GenotypingEngine { return false; } - protected static HashMap generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { - return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code - } - - protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) { + protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { final HashMap vcs = new HashMap(); int refPos = alignmentStartHapwrtRef; @@ -543,7 +537,7 @@ public class GenotypingEngine { if( BaseUtils.isRegularBase(refByte) ) { insertionAlleles.add( Allele.create(refByte, true) ); } - if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { + if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); } else { byte[] insertionBases = new byte[]{}; @@ -590,39 +584,16 @@ public class GenotypingEngine { case EQ: case X: { - int numSinceMismatch = -1; - int stopOfMismatch = -1; - int startOfMismatch = -1; - int refPosStartOfMismatch = -1; for( int iii = 0; iii < elementLength; iii++ ) { - if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) { - // SNP or start of possible MNP - if( stopOfMismatch == -1 ) { - startOfMismatch = alignmentPos; - stopOfMismatch = alignmentPos; - numSinceMismatch = 0; - refPosStartOfMismatch = refPos; - } else { - stopOfMismatch = alignmentPos; - } - } - if( stopOfMismatch != -1) { - numSinceMismatch++; - } - if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) { - final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 ); - final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 ); - if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) { + final byte refByte = ref[refPos]; + final byte altByte = alignment[alignmentPos]; + if( refByte != altByte ) { // SNP! + if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) { final ArrayList snpAlleles = new ArrayList(); - snpAlleles.add( Allele.create( refBases, true ) ); - snpAlleles.add( Allele.create( mismatchBases, false ) ); - final int snpStart = refLoc.getStart() + refPosStartOfMismatch; - vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); + snpAlleles.add( Allele.create( refByte, true ) ); + snpAlleles.add( Allele.create( altByte, false ) ); + vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make()); } - numSinceMismatch = -1; - stopOfMismatch = -1; - startOfMismatch = -1; - refPosStartOfMismatch = -1; } refPos++; alignmentPos++; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 39c7551f0..acb5c9ebe 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -119,10 +119,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) protected String keepRG = null; - @Hidden - @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false) - protected int MNP_LOOK_AHEAD = 0; - @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) protected int MIN_PRUNE_FACTOR = 1; @@ -135,7 +131,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; @Advanced - @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false) + @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) @@ -189,21 +185,21 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @ArgumentCollection private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection(); - // the calculation arguments - private UnifiedGenotyperEngine UG_engine = null; - private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; - @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false) protected boolean DEBUG; + // the UG engines + private UnifiedGenotyperEngine UG_engine = null; + private UnifiedGenotyperEngine UG_engine_simple_genotyper = null; + // the assembly engine - LocalAssemblyEngine assemblyEngine = null; + private LocalAssemblyEngine assemblyEngine = null; // the likelihoods engine - LikelihoodCalculationEngine likelihoodCalculationEngine = null; + private LikelihoodCalculationEngine likelihoodCalculationEngine = null; // the genotyping engine - GenotypingEngine genotypingEngine = null; + private GenotypingEngine genotypingEngine = null; // the annotation engine private VariantAnnotatorEngine annotationEngine; @@ -289,7 +285,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter ); likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false ); - genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); + genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index 38f11cc5d..8a401439b 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.SWPairwiseAlignment; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; @@ -337,8 +338,8 @@ public class HaplotypeResolver extends RodWalker { } // order results by start position - final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0)); - final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0)); + final TreeMap source1Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source1Haplotype), 0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1)); + final TreeMap source2Map = new TreeMap(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source2Haplotype), 0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2)); if ( source1Map.size() == 0 || source2Map.size() == 0 ) { // TODO -- handle errors appropriately logger.debug("No source alleles; aborting at " + refContext.getLocus()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 71aee44b8..a7a1630d4 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -293,7 +293,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place + final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 539190fe9..07e7b0d92 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -142,7 +142,6 @@ public class GenotypingEngineUnitTest extends BaseTest { byte[] ref; byte[] hap; HashMap expected; - GenotypingEngine ge = new GenotypingEngine(false, 0, false); public BasicGenotypingTestProvider(String refString, String hapString, HashMap expected) { super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString)); @@ -153,7 +152,7 @@ public class GenotypingEngineUnitTest extends BaseTest { public HashMap calcAlignment() { final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap); - return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0); + return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index fc9d56660..2ae1f2ca5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -8,9 +8,10 @@ import java.util.Arrays; public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String REF = b37KGReference; final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam"; + final String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam"; final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; + final String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam"; final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; - //final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr"; private void HCTest(String bam, String args, String md5) { final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; @@ -34,14 +35,35 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { } private void HCTestComplexVariants(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3"; + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 2"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); } @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "316a0fb9c66c0a6aa40a170d5d8c0021"); + HCTestComplexVariants(CEUTRIO_BAM, "", "3424b398a9f47c8ac606a5c56eb7d8a7"); + } + + private void HCTestSymbolicVariants(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 2"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerSingleSampleSymbolic() { + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b71cfaea9390136c584c9671b149d573"); + } + + private void HCTestIndelQualityScores(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerSingleSampleIndelQualityScores() { + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e1f88fac91424740c0eaac1de48b3970"); } } - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index 449e047cd..1fd220f2f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -1,11 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 95fadfd46..1ac8ee113 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -5,7 +5,6 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 4c0c0cabf..fc29a7f02 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -265,7 +265,7 @@ public class SelectVariants extends RodWalker implements TreeR @Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false) private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL; - @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false) + @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Store the original AC, AF, and AN values in the INFO field after selecting (using keys AC_Orig, AF_Orig, and AN_Orig)", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 6756c1c02..decc54d47 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.activeregion; import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; @@ -58,9 +59,7 @@ public class ActiveRegion implements HasGenomeLocation { } public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { - return referenceReader.getSubsequenceAt( extendedLoc.getContig(), - Math.max(1, extendedLoc.getStart() - padding), - Math.min(referenceReader.getSequenceDictionary().getSequence(extendedLoc.getContig()).getSequenceLength(), extendedLoc.getStop() + padding) ).getBases(); + return getReference( referenceReader, padding, extendedLoc ); } public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) { @@ -68,11 +67,16 @@ public class ActiveRegion implements HasGenomeLocation { } public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { - return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(), - Math.max(1, fullExtentReferenceLoc.getStart() - padding), - Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases(); + return getReference( referenceReader, padding, fullExtentReferenceLoc ); } + private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { + final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(), + Math.max(1, genomeLoc.getStart() - padding), + Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases(); + StringUtil.toUpperCase(reference); + return reference; + } @Override public GenomeLoc getLocation() { return activeRegionLoc; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index d8ab4bd23..40ac089ef 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -94,6 +94,7 @@ public class VariantContextBuilder { this.start = start; this.stop = stop; this.alleles = alleles; + this.attributes = Collections.emptyMap(); // immutable toValidate.add(VariantContext.Validation.ALLELES); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 272166c68..19620b8df 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -750,6 +750,10 @@ public class VariantContextUnitTest extends BaseTest { modified = new VariantContextBuilder(modified).attributes(null).attribute("AC", 1).make(); Assert.assertEquals(modified.getAttribute("AC"), 1); + // test the behavior when the builder's attribute object is not initialized + modified = new VariantContextBuilder(modified.getSource(), modified.getChr(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make(); + + // test normal attribute modification modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make(); Assert.assertEquals(modified.getAttribute("AC"), 1); modified = new VariantContextBuilder(modified).attribute("AC", 2).make(); diff --git a/public/perl/sortByRef.pl b/public/perl/sortByRef.pl index 71d3f4477..e17707796 100755 --- a/public/perl/sortByRef.pl +++ b/public/perl/sortByRef.pl @@ -50,7 +50,7 @@ my %ref_order; my $n = 0; while ( ) { chomp; - my ($contig, $rest) = split "\t"; + my ($contig, $rest) = split '\s'; die("Dictionary file is probably corrupt: multiple instances of contig $contig") if ( defined $ref_order{$contig} ); $ref_order{$contig} = $n;