HaplotypeCaller is now an ActiveRegionWalker.
This commit is contained in:
parent
3b1aad4f17
commit
4d6312d4ea
|
|
@ -46,7 +46,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||
final GenomeLocSortedSet initialIntervals = engine.getIntervals();
|
||||
final GenomeLocSortedSet initialIntervals = engine.getIntervals(); // BUGBUG: unfortunate inefficiency that needs to be removed
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
|
|
@ -166,20 +166,22 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( (GATKSAMRecord) read, true );
|
||||
bestRegion.add( (GATKSAMRecord) read );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( !bestRegion.equals(activeRegion) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read, false );
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read, false );
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(activeRegion) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read );
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read );
|
||||
}
|
||||
}
|
||||
}
|
||||
placedReads.add( read );
|
||||
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read, false );
|
||||
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
|
||||
activeRegion.add( (GATKSAMRecord) read );
|
||||
}
|
||||
}
|
||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
|
|
@ -207,7 +209,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
|
||||
// integrate active regions into contiguous chunks based on active status
|
||||
// integrate active regions into contiguous chunks with identical active status
|
||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
||||
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||
if( activeList.size() == 0 ) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,10 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
public boolean wantsNonPrimaryReads() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Determine active status over the AlignmentContext
|
||||
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,19 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 1/4/12
|
||||
*/
|
||||
|
||||
public class ActiveRead {
|
||||
final public GATKSAMRecord read;
|
||||
final public boolean isPrimaryRegion;
|
||||
|
||||
ActiveRead( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
||||
this.read = read;
|
||||
this.isPrimaryRegion = isPrimaryRegion;
|
||||
}
|
||||
}
|
||||
|
|
@ -16,8 +16,7 @@ import java.util.ArrayList;
|
|||
|
||||
public class ActiveRegion implements HasGenomeLocation {
|
||||
|
||||
private final ArrayList<ActiveRead> reads = new ArrayList<ActiveRead>();
|
||||
private byte[] reference = null;
|
||||
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
private final GenomeLoc activeRegionLoc;
|
||||
private final GenomeLoc extendedLoc;
|
||||
private final int extension;
|
||||
|
|
@ -35,28 +34,31 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
}
|
||||
|
||||
// add each read to the bin and extend the reference genome activeRegionLoc if needed
|
||||
public void add( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
||||
public void add( final GATKSAMRecord read ) {
|
||||
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||
reads.add( new ActiveRead(read, isPrimaryRegion) );
|
||||
reads.add( read );
|
||||
}
|
||||
|
||||
public ArrayList<ActiveRead> getReads() { return reads; }
|
||||
public ArrayList<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
// set up the reference if we haven't done so yet
|
||||
if ( reference == null ) {
|
||||
reference = referenceReader.getSubsequenceAt(fullExtentReferenceLoc.getContig(), fullExtentReferenceLoc.getStart(), fullExtentReferenceLoc.getStop()).getBases();
|
||||
}
|
||||
return getReference( referenceReader, 0 );
|
||||
}
|
||||
|
||||
return reference;
|
||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(),
|
||||
Math.max(1, fullExtentReferenceLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() { return activeRegionLoc; }
|
||||
|
||||
public GenomeLoc getExtendedLoc() { return extendedLoc; }
|
||||
public GenomeLoc getReferenceLoc() { return fullExtentReferenceLoc; }
|
||||
|
||||
public int getExtension() { return extension; }
|
||||
public int size() { return reads.size(); }
|
||||
public void clearReads() { reads.clear(); }
|
||||
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
||||
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||
}
|
||||
|
|
@ -1,9 +1,15 @@
|
|||
package org.broadinstitute.sting.utils.fragments;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -121,4 +127,63 @@ public class FragmentUtils {
|
|||
return create(reads, reads.size(), SamRecordGetter);
|
||||
}
|
||||
|
||||
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( List<GATKSAMRecord> overlappingPair ) {
|
||||
final byte MIN_QUAL_BAD_OVERLAP = 16;
|
||||
if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); }
|
||||
|
||||
GATKSAMRecord firstRead = overlappingPair.get(0);
|
||||
GATKSAMRecord secondRead = overlappingPair.get(1);
|
||||
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
||||
firstRead = overlappingPair.get(1);
|
||||
secondRead = overlappingPair.get(0);
|
||||
}
|
||||
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
||||
return overlappingPair; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
|
||||
}
|
||||
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
|
||||
return overlappingPair; // fragments contain indels so don't merge them
|
||||
}
|
||||
|
||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
||||
|
||||
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
|
||||
final int numBases = firstReadStop + secondRead.getReadLength();
|
||||
final byte[] bases = new byte[numBases];
|
||||
final byte[] quals = new byte[numBases];
|
||||
final byte[] firstReadBases = firstRead.getReadBases();
|
||||
final byte[] firstReadQuals = firstRead.getBaseQualities();
|
||||
final byte[] secondReadBases = secondRead.getReadBases();
|
||||
final byte[] secondReadQuals = secondRead.getBaseQualities();
|
||||
for(int iii = 0; iii < firstReadStop; iii++) {
|
||||
bases[iii] = firstReadBases[iii];
|
||||
quals[iii] = firstReadQuals[iii];
|
||||
}
|
||||
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
||||
return overlappingPair;// high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
|
||||
}
|
||||
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
||||
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
||||
}
|
||||
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
||||
bases[iii] = secondReadBases[iii-firstReadStop];
|
||||
quals[iii] = secondReadQuals[iii-firstReadStop];
|
||||
}
|
||||
|
||||
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
|
||||
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
|
||||
returnRead.setReadBases( bases );
|
||||
returnRead.setBaseQualities( quals );
|
||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||
returnRead.setReferenceName( firstRead.getReferenceName() );
|
||||
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
||||
final ArrayList<CigarElement> cList = new ArrayList<CigarElement>();
|
||||
cList.add(c);
|
||||
returnRead.setCigar( new Cigar( cList ));
|
||||
returnRead.setMappingQuality( firstRead.getMappingQuality() );
|
||||
|
||||
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
|
||||
returnList.add(returnRead);
|
||||
return returnList;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue