Intermediate commit refactoring FragmentPileup to (1) make it more accessible (now in utils.pileup) as well as (2) improve performance. Passes all integration tests now. Upcoming refactoring will change further how the system can be accessed, and further improve performance.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5522 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3bcd4c5d75
commit
6a1d12cf7b
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -256,55 +257,53 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
int n = 0;
|
||||
|
||||
// TODO-- move this outside the UG, e.g. to ReadBackedPileup
|
||||
// combine paired reads into a single fragment
|
||||
HashMap<String, PerFragmentPileupElement> fragments = new HashMap<String, PerFragmentPileupElement>();
|
||||
for ( PileupElement p : pileup ) {
|
||||
Set<PileupElement> fragment = new HashSet<PileupElement>();
|
||||
String readName = p.getRead().getReadName();
|
||||
|
||||
if ( fragments.containsKey(readName) )
|
||||
fragment.addAll(fragments.get(readName).getPileupElements());
|
||||
|
||||
fragment.add(p);
|
||||
fragments.put(readName, new PerFragmentPileupElement(fragment));
|
||||
}
|
||||
|
||||
// for each fragment, add to the likelihoods
|
||||
for ( PerFragmentPileupElement fragment : fragments.values() ) {
|
||||
for ( FragmentPileup.PerFragmentPileupElement fragment : new FragmentPileup(pileup) ) {
|
||||
n += add(fragment, ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
return add(new FragmentPileup.PerFragmentPileupElement(elt), ignoreBadBases, capBaseQualsAtMappingQual);
|
||||
}
|
||||
|
||||
public int add(PerFragmentPileupElement fragment, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
private final static byte qualToUse(PileupElement p, boolean capBaseQualsAtMappingQual) {
|
||||
byte qual = p.getQual();
|
||||
|
||||
if ( qual > SAMUtils.MAX_PHRED_SCORE )
|
||||
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
|
||||
if ( capBaseQualsAtMappingQual )
|
||||
qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
|
||||
|
||||
return qual;
|
||||
}
|
||||
|
||||
public int add(FragmentPileup.PerFragmentPileupElement fragment, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||
// TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine
|
||||
// TODO-- given the current state of next-gen sequencing, but may need to be fixed in the future.
|
||||
// TODO-- However, when that happens, we'll need to be a lot smarter about the caching we do here.
|
||||
byte observedBase1 = 0, observedBase2 = 0, qualityScore1 = 0, qualityScore2 = 0;
|
||||
for ( PileupElement p : fragment ) {
|
||||
if ( !usableBase(p, ignoreBadBases) )
|
||||
continue;
|
||||
|
||||
byte qual = p.getQual();
|
||||
if ( qual > SAMUtils.MAX_PHRED_SCORE )
|
||||
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
|
||||
if ( capBaseQualsAtMappingQual )
|
||||
qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
|
||||
|
||||
if ( qualityScore1 == 0 ) {
|
||||
observedBase1 = p.getBase();
|
||||
qualityScore1 = qual;
|
||||
} else {
|
||||
observedBase2 = p.getBase();
|
||||
qualityScore2 = qual;
|
||||
}
|
||||
if ( usableBase(fragment.getFirst(), ignoreBadBases) ) {
|
||||
observedBase1 = fragment.getFirst().getBase();
|
||||
qualityScore1 = qualToUse(fragment.getFirst(), capBaseQualsAtMappingQual);
|
||||
}
|
||||
|
||||
// abort early if we didn't see any good bases
|
||||
if ( observedBase1 == 0 && observedBase2 == 0 )
|
||||
return 0;
|
||||
if ( fragment.hasSecond() && usableBase(fragment.getSecond(), ignoreBadBases) ) {
|
||||
observedBase2 = fragment.getSecond().getBase();
|
||||
qualityScore2 = qualToUse(fragment.getSecond(), capBaseQualsAtMappingQual);
|
||||
}
|
||||
|
||||
if ( observedBase1 == 0 ) {
|
||||
if ( observedBase2 == 0 ) // abort early if we didn't see any good bases
|
||||
return 0;
|
||||
else { // otherwise make 2 1
|
||||
observedBase1 = observedBase2;
|
||||
qualityScore1 = qualityScore2;
|
||||
observedBase2 = qualityScore2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Just look up the cached result if it's available, or compute and store it
|
||||
DiploidSNPGenotypeLikelihoods gl;
|
||||
|
|
|
|||
|
|
@ -1,55 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: ebanks
|
||||
* Date: Jan 10, 2011
|
||||
*
|
||||
* Useful helper class to represent a full read pair at a position
|
||||
*/
|
||||
public class PerFragmentPileupElement implements Iterable<PileupElement> {
|
||||
protected Set<PileupElement> PEs = null;
|
||||
|
||||
public PerFragmentPileupElement(Set<PileupElement> PEs) {
|
||||
this.PEs = new HashSet<PileupElement>(PEs);
|
||||
}
|
||||
|
||||
public Set<PileupElement> getPileupElements() {
|
||||
return PEs;
|
||||
}
|
||||
|
||||
public Iterator<PileupElement> iterator() {
|
||||
return PEs.iterator();
|
||||
}
|
||||
}
|
||||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -120,11 +121,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
|||
for ( PileupElement p : context.getBasePileup() ) {
|
||||
byte base = p.getBase();
|
||||
if (!ReadsToDiscard.contains(p.getRead().getReadName()) && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
|
||||
|
||||
// TODO-- move this outside, e.g. to ReadBackedPileup
|
||||
HashSet<PileupElement> fragment = new HashSet<PileupElement>();
|
||||
fragment.add(p);
|
||||
G.add(new PerFragmentPileupElement(fragment), true, false);
|
||||
G.add(p, true, false);
|
||||
//if (DEBUG){
|
||||
if (base == 'A'){numAs++;}
|
||||
else if (base == 'C'){numCs++;}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
|
|
@ -368,10 +369,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
}
|
||||
}else{
|
||||
//consider base in likelihood calculations if it looks good and has high mapping score
|
||||
// TODO-- move this outside, e.g. to ReadBackedPileup
|
||||
HashSet<PileupElement> fragment = new HashSet<PileupElement>();
|
||||
fragment.add(p);
|
||||
G.add(new PerFragmentPileupElement(fragment), true, false);
|
||||
G.add(p, true, false);
|
||||
if (!AllReadNames.contains(readname)){AllReadNames.add(readname); AllReads.add(p.getRead());}
|
||||
if (base == 'A'){numAs++; depth++;}
|
||||
else if (base == 'C'){numCs++; depth++;}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An easy to access fragment-based pileup. new FragmentPileup(RBP) creates one, and you
|
||||
* can either iterate over or get the collection of PerFragmentPileupElements.
|
||||
*
|
||||
* Based on the original code by E. Banks
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 3/26/11
|
||||
* Time: 10:09 PM
|
||||
*/
|
||||
public class FragmentPileup implements Iterable<FragmentPileup.PerFragmentPileupElement> {
|
||||
final Collection<PerFragmentPileupElement> fragments = new ArrayList<PerFragmentPileupElement>();
|
||||
|
||||
/**
|
||||
* Create a new Fragment-based pileup from the standard read-based pileup
|
||||
* @param pileup
|
||||
*/
|
||||
public FragmentPileup(ReadBackedPileup pileup) {
|
||||
Map<String, PileupElement> nameMap = new HashMap<String, PileupElement>();
|
||||
|
||||
// build an initial map, grabbing all of the multi-read fragments
|
||||
for ( PileupElement p : pileup ) {
|
||||
String readName = p.getRead().getReadName();
|
||||
|
||||
PileupElement pe1 = nameMap.get(readName);
|
||||
if ( pe1 != null ) {
|
||||
// assumes we have at most 2 reads per fragment
|
||||
fragments.add(new PerFragmentPileupElement(pe1, p));
|
||||
nameMap.remove(readName);
|
||||
} else {
|
||||
nameMap.put(readName, p);
|
||||
}
|
||||
}
|
||||
|
||||
// now go through the values in the nameMap to get the fragments with only a single read
|
||||
for ( PileupElement p : nameMap.values() )
|
||||
fragments.add(new PerFragmentPileupElement(p));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the fragments, in no particular order
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Collection<PerFragmentPileupElement> getFragments() {
|
||||
return fragments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator over the fragments. No specific order of fragments is assumed
|
||||
* @return
|
||||
*/
|
||||
public Iterator<PerFragmentPileupElement> iterator() {
|
||||
return fragments.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Useful helper class to represent a full read pair at a position
|
||||
*
|
||||
* User: ebanks
|
||||
* Date: Jan 10, 2011
|
||||
*/
|
||||
public static class PerFragmentPileupElement {
|
||||
protected PileupElement PE1 = null, PE2 = null;
|
||||
|
||||
/**
|
||||
* Creates a fragment element that only contains a single read
|
||||
* @param PE
|
||||
*/
|
||||
public PerFragmentPileupElement(PileupElement PE) {
|
||||
PE1 = PE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a fragment element that contains both ends of a paired end read
|
||||
* @param PE1
|
||||
* @param PE2
|
||||
*/
|
||||
public PerFragmentPileupElement(PileupElement PE1, PileupElement PE2) {
|
||||
this.PE1 = PE1;
|
||||
this.PE2 = PE2;
|
||||
}
|
||||
|
||||
/** Returns the first pileup element -- never null */
|
||||
public PileupElement getFirst() { return PE1; }
|
||||
|
||||
/** Is there a second read in this fragment element? */
|
||||
public boolean hasSecond() { return PE2 != null; }
|
||||
|
||||
/** Returns the second read in this fragment element. May be null */
|
||||
public PileupElement getSecond() { return PE2; }
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue