2009-12-11 03:21:16 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2009 The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package org.broadinstitute.sting.gatk.contexts;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
import net.sf.samtools.SAMReadGroupRecord;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
2010-04-21 01:38:09 +08:00
|
|
|
import org.broadinstitute.sting.utils.pileup.*;
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-04-21 01:38:09 +08:00
|
|
|
import java.util.*;
|
2009-12-11 03:21:16 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Useful class for storing different AlignmentContexts
|
|
|
|
|
* User: ebanks
|
2010-02-27 02:18:38 +08:00
|
|
|
* Modified: chartl (split by read group)
|
2009-12-11 03:21:16 +08:00
|
|
|
*/
|
|
|
|
|
public class StratifiedAlignmentContext {
|
|
|
|
|
|
2009-12-11 03:50:06 +08:00
|
|
|
// Definitions:
|
|
|
|
|
// COMPLETE = full alignment context
|
2010-01-08 13:40:42 +08:00
|
|
|
// FORWARD = reads on forward strand
|
2009-12-17 01:28:09 +08:00
|
|
|
// REVERSE = reads on forward strand
|
2009-12-11 03:50:06 +08:00
|
|
|
//
|
2009-12-17 01:28:09 +08:00
|
|
|
public enum StratifiedContextType { COMPLETE, FORWARD, REVERSE }
|
2009-12-11 03:21:16 +08:00
|
|
|
|
|
|
|
|
private GenomeLoc loc;
|
2009-12-11 03:50:06 +08:00
|
|
|
private AlignmentContext[] contexts = new AlignmentContext[StratifiedContextType.values().length];
|
2010-04-21 01:38:09 +08:00
|
|
|
private boolean isExtended = false; // tells whether this alignment context is an extended event context
|
2010-03-29 05:45:22 +08:00
|
|
|
|
|
|
|
|
// todo -- why are you storing reads separately each time? There's a ReadBackedPileup object that's supposed to handle this
|
2010-04-21 01:38:09 +08:00
|
|
|
// private ArrayList<SAMRecord>[] reads = new ArrayList[StratifiedContextType.values().length];
|
|
|
|
|
// private ArrayList<Integer>[] offsets = new ArrayList[StratifiedContextType.values().length];
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-04-21 01:38:09 +08:00
|
|
|
private ArrayList<PileupElement>[] pileupElems = new ArrayList[StratifiedContextType.values().length];
|
2010-03-29 05:45:22 +08:00
|
|
|
//
|
|
|
|
|
// accessors
|
|
|
|
|
//
|
|
|
|
|
public GenomeLoc getLocation() { return loc; }
|
2010-04-21 01:38:09 +08:00
|
|
|
// public ArrayList<SAMRecord> getReads(StratifiedContextType type) { return reads[type.ordinal()]; }
|
|
|
|
|
// public ArrayList<Integer> getOffsets(StratifiedContextType type) { return offsets[type.ordinal()]; }
|
|
|
|
|
|
|
|
|
|
public ArrayList<PileupElement> getPileupElements(StratifiedContextType type) {
|
|
|
|
|
return pileupElems[type.ordinal()];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// public ArrayList<ExtendedEventPileupElement> getExtendedPileupElements(StratifiedContextType type) {
|
|
|
|
|
// if ( ! isExtended ) throw new StingException("Extended read backed pileups requested from StratifiedAlignmentContext that holds simple pileups");
|
|
|
|
|
//
|
|
|
|
|
// return (ArrayList<ExtendedEventPileupElement>)(pileupElems[type.ordinal()]);
|
|
|
|
|
// }
|
2009-12-11 03:21:16 +08:00
|
|
|
|
|
|
|
|
public StratifiedAlignmentContext(GenomeLoc loc) {
|
2010-04-21 01:38:09 +08:00
|
|
|
this(loc,false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public StratifiedAlignmentContext(GenomeLoc loc, boolean isExtended) {
|
2009-12-11 03:21:16 +08:00
|
|
|
this.loc = loc;
|
2010-04-21 01:38:09 +08:00
|
|
|
this.isExtended = isExtended;
|
2009-12-11 03:50:06 +08:00
|
|
|
for ( int i = 0; i < StratifiedContextType.values().length; i++) {
|
2010-04-21 01:38:09 +08:00
|
|
|
if ( isExtended ) pileupElems[i] = new ArrayList<PileupElement>();
|
|
|
|
|
else pileupElems[i] = new ArrayList<PileupElement>();
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-03-29 05:45:22 +08:00
|
|
|
public AlignmentContext getContext(StratifiedContextType type) {
|
|
|
|
|
int index = type.ordinal();
|
2010-04-21 01:38:09 +08:00
|
|
|
if ( contexts[index] == null ) {
|
|
|
|
|
if ( isExtended ) {
|
2010-06-13 09:47:02 +08:00
|
|
|
contexts[index] = new AlignmentContext(loc , new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList<ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)getPileupElements(type))));
|
2010-04-21 01:38:09 +08:00
|
|
|
} else {
|
2010-06-13 09:47:02 +08:00
|
|
|
contexts[index] = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, getPileupElements(type)));
|
2010-04-21 01:38:09 +08:00
|
|
|
}
|
|
|
|
|
}
|
2009-12-11 03:50:06 +08:00
|
|
|
return contexts[index];
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void add(SAMRecord read, int offset) {
|
2010-04-21 01:38:09 +08:00
|
|
|
if ( isExtended ) throw new StingException("Can not add read/offset without event type specified to the context holding extended events");
|
|
|
|
|
if ( read.getReadNegativeStrandFlag() ) {
|
|
|
|
|
pileupElems[StratifiedContextType.REVERSE.ordinal()].add(new PileupElement(read,offset));
|
|
|
|
|
} else {
|
|
|
|
|
pileupElems[StratifiedContextType.FORWARD.ordinal()].add(new PileupElement(read,offset));
|
|
|
|
|
}
|
|
|
|
|
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(new PileupElement(read,offset));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void add(PileupElement p) {
|
|
|
|
|
// if ( isExtended ) throw new StingException("Can not add simple pileup element to the context holding extended events");
|
|
|
|
|
SAMRecord read = p.getRead();
|
|
|
|
|
if ( read.getReadNegativeStrandFlag() ) {
|
|
|
|
|
pileupElems[StratifiedContextType.REVERSE.ordinal()].add(p);
|
|
|
|
|
} else {
|
|
|
|
|
pileupElems[StratifiedContextType.FORWARD.ordinal()].add(p);
|
|
|
|
|
}
|
|
|
|
|
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(p);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void add(SAMRecord read, int offset, int length, byte [] bases) {
|
|
|
|
|
if ( ! isExtended ) throw new StingException("Can not add read/offset with event type specified to the context holding simple events");
|
2009-12-17 01:28:09 +08:00
|
|
|
if ( read.getReadNegativeStrandFlag() ) {
|
2010-04-21 01:38:09 +08:00
|
|
|
pileupElems[StratifiedContextType.REVERSE.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases));
|
2009-12-17 01:28:09 +08:00
|
|
|
} else {
|
2010-04-21 01:38:09 +08:00
|
|
|
pileupElems[StratifiedContextType.FORWARD.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases));
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|
2010-04-21 01:38:09 +08:00
|
|
|
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases));
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|
|
|
|
|
|
2010-04-21 01:38:09 +08:00
|
|
|
// public void add(ExtendedEventPileupElement p) {
|
|
|
|
|
// if ( ! isExtended ) throw new StingException("Can not add extended pileup element to the context holding simple events");
|
|
|
|
|
// SAMRecord read = p.getRead();
|
|
|
|
|
// if ( read.getReadNegativeStrandFlag() ) {
|
|
|
|
|
// pileupElems[StratifiedContextType.REVERSE.ordinal()].add(p);
|
|
|
|
|
// } else {
|
|
|
|
|
// pileupElems[StratifiedContextType.FORWARD.ordinal()].add(p);
|
|
|
|
|
// }
|
|
|
|
|
// pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(p);
|
|
|
|
|
// }
|
|
|
|
|
|
2009-12-11 03:21:16 +08:00
|
|
|
/**
|
|
|
|
|
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
2010-01-08 13:40:42 +08:00
|
|
|
*
|
|
|
|
|
* @param pileup the original pileup
|
|
|
|
|
*
|
|
|
|
|
* @return a Map of sample name to StratifiedAlignmentContext
|
|
|
|
|
*
|
|
|
|
|
**/
|
|
|
|
|
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedPileup pileup) {
|
|
|
|
|
return splitContextBySample(pileup, null, null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
2009-12-11 03:21:16 +08:00
|
|
|
*
|
2009-12-17 01:28:09 +08:00
|
|
|
* @param pileup the original pileup
|
2009-12-11 03:21:16 +08:00
|
|
|
* @param assumedSingleSample if not null, any read without a readgroup will be given this sample name
|
|
|
|
|
* @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group
|
|
|
|
|
*
|
|
|
|
|
* @return a Map of sample name to StratifiedAlignmentContext
|
|
|
|
|
*
|
|
|
|
|
**/
|
2009-12-17 01:28:09 +08:00
|
|
|
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedPileup pileup, String assumedSingleSample, String collapseToThisSample) {
|
2009-12-11 03:21:16 +08:00
|
|
|
|
|
|
|
|
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
|
2010-03-10 12:30:12 +08:00
|
|
|
GenomeLoc loc = pileup.getLocation();
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
for (PileupElement p : pileup )
|
|
|
|
|
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample);
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
return contexts;
|
|
|
|
|
}
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
/**
|
|
|
|
|
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
|
|
|
|
*
|
|
|
|
|
* @param pileup the original pileup
|
|
|
|
|
*
|
|
|
|
|
* @return a Map of sample name to StratifiedAlignmentContext
|
|
|
|
|
*
|
|
|
|
|
**/
|
|
|
|
|
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedExtendedEventPileup pileup) {
|
|
|
|
|
return splitContextBySample(pileup, null, null);
|
|
|
|
|
}
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
/**
|
|
|
|
|
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
|
|
|
|
*
|
|
|
|
|
* @param pileup the original pileup
|
|
|
|
|
* @param assumedSingleSample if not null, any read without a readgroup will be given this sample name
|
|
|
|
|
* @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group
|
|
|
|
|
*
|
|
|
|
|
* @return a Map of sample name to StratifiedAlignmentContext
|
|
|
|
|
*
|
|
|
|
|
**/
|
|
|
|
|
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedExtendedEventPileup pileup, String assumedSingleSample, String collapseToThisSample) {
|
|
|
|
|
|
|
|
|
|
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
|
|
|
|
|
GenomeLoc loc = pileup.getLocation();
|
|
|
|
|
|
|
|
|
|
for (PileupElement p : pileup )
|
2010-04-21 01:38:09 +08:00
|
|
|
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample,true);
|
2010-03-10 12:30:12 +08:00
|
|
|
|
|
|
|
|
return contexts;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static void addToContext(HashMap<String, StratifiedAlignmentContext> contexts, PileupElement p, GenomeLoc loc, String assumedSingleSample, String collapseToThisSample) {
|
2010-04-21 01:38:09 +08:00
|
|
|
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static void addToContext(HashMap<String, StratifiedAlignmentContext> contexts, PileupElement p, GenomeLoc loc, String assumedSingleSample, String collapseToThisSample, boolean isExtended) {
|
|
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
// get the read
|
|
|
|
|
SAMRecord read = p.getRead();
|
|
|
|
|
|
|
|
|
|
// find the sample
|
|
|
|
|
String sample;
|
|
|
|
|
if ( collapseToThisSample != null ) {
|
|
|
|
|
sample = collapseToThisSample;
|
|
|
|
|
} else {
|
|
|
|
|
SAMReadGroupRecord readGroup = read.getReadGroup();
|
|
|
|
|
if ( readGroup == null ) {
|
|
|
|
|
if ( assumedSingleSample == null )
|
|
|
|
|
throw new StingException("Missing read group for read " + read.getReadName());
|
|
|
|
|
sample = assumedSingleSample;
|
|
|
|
|
} else {
|
|
|
|
|
sample = readGroup.getSample();
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|
2010-03-10 12:30:12 +08:00
|
|
|
}
|
2009-12-11 03:21:16 +08:00
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
// create a new context object if this is the first time we're seeing a read for this sample
|
|
|
|
|
StratifiedAlignmentContext myContext = contexts.get(sample);
|
|
|
|
|
if ( myContext == null ) {
|
2010-04-21 01:38:09 +08:00
|
|
|
myContext = new StratifiedAlignmentContext(loc,isExtended);
|
2010-03-10 12:30:12 +08:00
|
|
|
contexts.put(sample, myContext);
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-10 12:30:12 +08:00
|
|
|
// add the read to this sample's context
|
|
|
|
|
// note that bad bases are added to the context (for DoC calculations later)
|
2010-04-21 01:38:09 +08:00
|
|
|
myContext.add(p);
|
2010-02-27 02:18:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Splits the given AlignmentContext into a StratifiedAlignmentContext per read group.
|
|
|
|
|
*
|
|
|
|
|
* @param pileup the original pileup
|
|
|
|
|
* @return a Map of sample name to StratifiedAlignmentContext
|
2010-03-10 12:30:12 +08:00
|
|
|
* TODO - support for collapsing or assuming read groups if they are missing
|
2010-02-27 02:18:38 +08:00
|
|
|
*
|
|
|
|
|
**/
|
|
|
|
|
public static Map<String,StratifiedAlignmentContext> splitContextByReadGroup(ReadBackedPileup pileup) {
|
|
|
|
|
HashMap<String,StratifiedAlignmentContext> contexts = new HashMap<String,StratifiedAlignmentContext>();
|
|
|
|
|
|
|
|
|
|
for ( PileupElement p : pileup ) {
|
|
|
|
|
SAMRecord read = p.getRead();
|
|
|
|
|
|
|
|
|
|
SAMReadGroupRecord readGroup = read.getReadGroup();
|
|
|
|
|
if ( readGroup == null ) {
|
|
|
|
|
throw new StingException("Missing read group for read " + read.getReadName());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String group = readGroup.getReadGroupId();
|
|
|
|
|
|
|
|
|
|
StratifiedAlignmentContext myContext = contexts.get(group);
|
|
|
|
|
|
|
|
|
|
if ( myContext == null ) {
|
|
|
|
|
myContext = new StratifiedAlignmentContext(pileup.getLocation());
|
|
|
|
|
contexts.put(group,myContext);
|
|
|
|
|
}
|
|
|
|
|
|
2010-04-21 01:38:09 +08:00
|
|
|
myContext.add(p);
|
2010-02-27 02:18:38 +08:00
|
|
|
}
|
|
|
|
|
|
2009-12-11 03:21:16 +08:00
|
|
|
return contexts;
|
|
|
|
|
}
|
2010-03-29 05:45:22 +08:00
|
|
|
|
|
|
|
|
public static AlignmentContext joinContexts(Collection<StratifiedAlignmentContext> contexts, StratifiedContextType type) {
|
2010-04-21 01:38:09 +08:00
|
|
|
ArrayList<PileupElement> pe = new ArrayList();
|
2010-03-29 05:45:22 +08:00
|
|
|
|
2010-04-21 01:38:09 +08:00
|
|
|
if ( contexts.size() == 0 )
|
2010-03-29 05:45:22 +08:00
|
|
|
throw new StingException("BUG: joinContexts requires at least one context to join");
|
2010-04-21 01:38:09 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
Iterator<StratifiedAlignmentContext> it = contexts.iterator();
|
|
|
|
|
StratifiedAlignmentContext context = it.next();
|
|
|
|
|
boolean isExtended = context.isExtended;
|
|
|
|
|
GenomeLoc loc = context.getLocation();
|
|
|
|
|
pe.addAll(context.getPileupElements(type));
|
|
|
|
|
|
|
|
|
|
while ( it.hasNext()) {
|
|
|
|
|
context = it.next();
|
|
|
|
|
if ( ! loc.equals( context.getLocation() ) )
|
|
|
|
|
throw new StingException("Illegal attempt to join contexts from different genomic locations");
|
|
|
|
|
if ( context.isExtended != isExtended )
|
|
|
|
|
throw new StingException("Illegal attempt to join simple and extended contexts");
|
|
|
|
|
pe.addAll(context.getPileupElements(type));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dirty trick below. generics do not allow to cast pe (ArrayList<PileupElement>) directly to ArrayList<ExtendedEventPileupElement>,
|
|
|
|
|
// so we first cast to "? extends" wildcard, then to what we actually need.
|
2010-06-13 09:47:02 +08:00
|
|
|
if ( isExtended ) return new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)pe)) );
|
|
|
|
|
else return new AlignmentContext(loc, new UnifiedReadBackedPileup(loc,pe));
|
2010-03-29 05:45:22 +08:00
|
|
|
}
|
2009-12-11 03:21:16 +08:00
|
|
|
}
|