diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index cc8424b88..8177dc579 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.FragmentPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -256,55 +257,53 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) { int n = 0; - // TODO-- move this outside the UG, e.g. to ReadBackedPileup - // combine paired reads into a single fragment - HashMap fragments = new HashMap(); - for ( PileupElement p : pileup ) { - Set fragment = new HashSet(); - String readName = p.getRead().getReadName(); - - if ( fragments.containsKey(readName) ) - fragment.addAll(fragments.get(readName).getPileupElements()); - - fragment.add(p); - fragments.put(readName, new PerFragmentPileupElement(fragment)); - } - // for each fragment, add to the likelihoods - for ( PerFragmentPileupElement fragment : fragments.values() ) { + for ( FragmentPileup.PerFragmentPileupElement fragment : new FragmentPileup(pileup) ) { n += add(fragment, ignoreBadBases, capBaseQualsAtMappingQual); } return n; } + public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) { + return add(new FragmentPileup.PerFragmentPileupElement(elt), ignoreBadBases, capBaseQualsAtMappingQual); + } - public int add(PerFragmentPileupElement fragment, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) { + private final static byte qualToUse(PileupElement p, boolean capBaseQualsAtMappingQual) { + byte qual = p.getQual(); + + if ( qual > SAMUtils.MAX_PHRED_SCORE ) + throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName())); + if ( capBaseQualsAtMappingQual ) + qual = (byte)Math.min((int)p.getQual(), p.getMappingQual()); + + return qual; + } + + public int add(FragmentPileup.PerFragmentPileupElement fragment, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) { // TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine // TODO-- given the current state of next-gen sequencing, but may need to be fixed in the future. // TODO-- However, when that happens, we'll need to be a lot smarter about the caching we do here. byte observedBase1 = 0, observedBase2 = 0, qualityScore1 = 0, qualityScore2 = 0; - for ( PileupElement p : fragment ) { - if ( !usableBase(p, ignoreBadBases) ) - continue; - byte qual = p.getQual(); - if ( qual > SAMUtils.MAX_PHRED_SCORE ) - throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName())); - if ( capBaseQualsAtMappingQual ) - qual = (byte)Math.min((int)p.getQual(), p.getMappingQual()); - - if ( qualityScore1 == 0 ) { - observedBase1 = p.getBase(); - qualityScore1 = qual; - } else { - observedBase2 = p.getBase(); - qualityScore2 = qual; - } + if ( usableBase(fragment.getFirst(), ignoreBadBases) ) { + observedBase1 = fragment.getFirst().getBase(); + qualityScore1 = qualToUse(fragment.getFirst(), capBaseQualsAtMappingQual); } - // abort early if we didn't see any good bases - if ( observedBase1 == 0 && observedBase2 == 0 ) - return 0; + if ( fragment.hasSecond() && usableBase(fragment.getSecond(), ignoreBadBases) ) { + observedBase2 = fragment.getSecond().getBase(); + qualityScore2 = qualToUse(fragment.getSecond(), capBaseQualsAtMappingQual); + } + + if ( observedBase1 == 0 ) { + if ( observedBase2 == 0 ) // abort early if we didn't see any good bases + return 0; + else { // otherwise make 2 1 + observedBase1 = observedBase2; + qualityScore1 = qualityScore2; + observedBase2 = qualityScore2 = 0; + } + } // Just look up the cached result if it's available, or compute and store it DiploidSNPGenotypeLikelihoods gl; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerFragmentPileupElement.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerFragmentPileupElement.java deleted file mode 100755 index c15ba2016..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerFragmentPileupElement.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.pileup.PileupElement; - -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; - -/** - * Created by IntelliJ IDEA. - * User: ebanks - * Date: Jan 10, 2011 - * - * Useful helper class to represent a full read pair at a position - */ -public class PerFragmentPileupElement implements Iterable { - protected Set PEs = null; - - public PerFragmentPileupElement(Set PEs) { - this.PEs = new HashSet(PEs); - } - - public Set getPileupElements() { - return PEs; - } - - public Iterator iterator() { - return PEs.iterator(); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java index 61fd37f04..11247a8c0 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateBaseLikelihoodsWalker.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.pileup.FragmentPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import java.util.ArrayList; @@ -120,11 +121,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker fragment = new HashSet(); - fragment.add(p); - G.add(new PerFragmentPileupElement(fragment), true, false); + G.add(p, true, false); //if (DEBUG){ if (base == 'A'){numAs++;} else if (base == 'C'){numCs++;} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java index 43bc42e38..70e37a783 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.FragmentPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; @@ -368,10 +369,7 @@ public class CallHLAWalker extends LocusWalker>{ } }else{ //consider base in likelihood calculations if it looks good and has high mapping score - // TODO-- move this outside, e.g. to ReadBackedPileup - HashSet fragment = new HashSet(); - fragment.add(p); - G.add(new PerFragmentPileupElement(fragment), true, false); + G.add(p, true, false); if (!AllReadNames.contains(readname)){AllReadNames.add(readname); AllReads.add(p.getRead());} if (base == 'A'){numAs++; depth++;} else if (base == 'C'){numCs++; depth++;} diff --git a/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java new file mode 100644 index 000000000..50abf1f90 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -0,0 +1,97 @@ +package org.broadinstitute.sting.utils.pileup; + +import java.util.*; + +/** + * An easy to access fragment-based pileup. new FragmentPileup(RBP) creates one, and you + * can either iterate over or get the collection of PerFragmentPileupElements. + * + * Based on the original code by E. Banks + * + * User: depristo + * Date: 3/26/11 + * Time: 10:09 PM + */ +public class FragmentPileup implements Iterable { + final Collection fragments = new ArrayList(); + + /** + * Create a new Fragment-based pileup from the standard read-based pileup + * @param pileup + */ + public FragmentPileup(ReadBackedPileup pileup) { + Map nameMap = new HashMap(); + + // build an initial map, grabbing all of the multi-read fragments + for ( PileupElement p : pileup ) { + String readName = p.getRead().getReadName(); + + PileupElement pe1 = nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + fragments.add(new PerFragmentPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + nameMap.put(readName, p); + } + } + + // now go through the values in the nameMap to get the fragments with only a single read + for ( PileupElement p : nameMap.values() ) + fragments.add(new PerFragmentPileupElement(p)); + } + + /** + * Gets the fragments, in no particular order + * + * @return + */ + public Collection getFragments() { + return fragments; + } + + /** + * Returns an iterator over the fragments. No specific order of fragments is assumed + * @return + */ + public Iterator iterator() { + return fragments.iterator(); + } + + /** + * Useful helper class to represent a full read pair at a position + * + * User: ebanks + * Date: Jan 10, 2011 + */ + public static class PerFragmentPileupElement { + protected PileupElement PE1 = null, PE2 = null; + + /** + * Creates a fragment element that only contains a single read + * @param PE + */ + public PerFragmentPileupElement(PileupElement PE) { + PE1 = PE; + } + + /** + * Creates a fragment element that contains both ends of a paired end read + * @param PE1 + * @param PE2 + */ + public PerFragmentPileupElement(PileupElement PE1, PileupElement PE2) { + this.PE1 = PE1; + this.PE2 = PE2; + } + + /** Returns the first pileup element -- never null */ + public PileupElement getFirst() { return PE1; } + + /** Is there a second read in this fragment element? */ + public boolean hasSecond() { return PE2 != null; } + + /** Returns the second read in this fragment element. May be null */ + public PileupElement getSecond() { return PE2; } + } +}