HaplotypeCaller is now an ActiveRegionWalker.
This commit is contained in:
parent
3b1aad4f17
commit
4d6312d4ea
|
|
@ -46,7 +46,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||||
|
|
||||||
final LocusView locusView = getLocusView( walker, dataProvider );
|
final LocusView locusView = getLocusView( walker, dataProvider );
|
||||||
final GenomeLocSortedSet initialIntervals = engine.getIntervals();
|
final GenomeLocSortedSet initialIntervals = engine.getIntervals(); // BUGBUG: unfortunate inefficiency that needs to be removed
|
||||||
|
|
||||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||||
|
|
@ -166,20 +166,22 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
bestRegion = otherRegionToTest;
|
bestRegion = otherRegionToTest;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bestRegion.add( (GATKSAMRecord) read, true );
|
bestRegion.add( (GATKSAMRecord) read );
|
||||||
|
|
||||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||||
if( !bestRegion.equals(activeRegion) ) {
|
if( walker.wantsNonPrimaryReads() ) {
|
||||||
activeRegion.add( (GATKSAMRecord) read, false );
|
if( !bestRegion.equals(activeRegion) ) {
|
||||||
}
|
activeRegion.add( (GATKSAMRecord) read );
|
||||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
}
|
||||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||||
activeRegion.add( (GATKSAMRecord) read, false );
|
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||||
|
activeRegion.add( (GATKSAMRecord) read );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
placedReads.add( read );
|
placedReads.add( read );
|
||||||
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) ) {
|
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
|
||||||
activeRegion.add( (GATKSAMRecord) read, false );
|
activeRegion.add( (GATKSAMRecord) read );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||||
|
|
@ -207,7 +209,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||||
}
|
}
|
||||||
|
|
||||||
// integrate active regions into contiguous chunks based on active status
|
// integrate active regions into contiguous chunks with identical active status
|
||||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
||||||
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||||
if( activeList.size() == 0 ) {
|
if( activeList.size() == 0 ) {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,10 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean wantsNonPrimaryReads() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine active status over the AlignmentContext
|
// Determine active status over the AlignmentContext
|
||||||
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.activeregion;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: rpoplin
|
|
||||||
* Date: 1/4/12
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class ActiveRead {
|
|
||||||
final public GATKSAMRecord read;
|
|
||||||
final public boolean isPrimaryRegion;
|
|
||||||
|
|
||||||
ActiveRead( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
|
||||||
this.read = read;
|
|
||||||
this.isPrimaryRegion = isPrimaryRegion;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -16,8 +16,7 @@ import java.util.ArrayList;
|
||||||
|
|
||||||
public class ActiveRegion implements HasGenomeLocation {
|
public class ActiveRegion implements HasGenomeLocation {
|
||||||
|
|
||||||
private final ArrayList<ActiveRead> reads = new ArrayList<ActiveRead>();
|
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||||
private byte[] reference = null;
|
|
||||||
private final GenomeLoc activeRegionLoc;
|
private final GenomeLoc activeRegionLoc;
|
||||||
private final GenomeLoc extendedLoc;
|
private final GenomeLoc extendedLoc;
|
||||||
private final int extension;
|
private final int extension;
|
||||||
|
|
@ -35,28 +34,31 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
}
|
}
|
||||||
|
|
||||||
// add each read to the bin and extend the reference genome activeRegionLoc if needed
|
// add each read to the bin and extend the reference genome activeRegionLoc if needed
|
||||||
public void add( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
public void add( final GATKSAMRecord read ) {
|
||||||
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||||
reads.add( new ActiveRead(read, isPrimaryRegion) );
|
reads.add( read );
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayList<ActiveRead> getReads() { return reads; }
|
public ArrayList<GATKSAMRecord> getReads() { return reads; }
|
||||||
|
|
||||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
|
public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||||
// set up the reference if we haven't done so yet
|
return getReference( referenceReader, 0 );
|
||||||
if ( reference == null ) {
|
}
|
||||||
reference = referenceReader.getSubsequenceAt(fullExtentReferenceLoc.getContig(), fullExtentReferenceLoc.getStart(), fullExtentReferenceLoc.getStop()).getBases();
|
|
||||||
}
|
|
||||||
|
|
||||||
return reference;
|
public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||||
|
return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(),
|
||||||
|
Math.max(1, fullExtentReferenceLoc.getStart() - padding),
|
||||||
|
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GenomeLoc getLocation() { return activeRegionLoc; }
|
public GenomeLoc getLocation() { return activeRegionLoc; }
|
||||||
|
|
||||||
public GenomeLoc getExtendedLoc() { return extendedLoc; }
|
public GenomeLoc getExtendedLoc() { return extendedLoc; }
|
||||||
public GenomeLoc getReferenceLoc() { return fullExtentReferenceLoc; }
|
public GenomeLoc getReferenceLoc() { return fullExtentReferenceLoc; }
|
||||||
|
|
||||||
public int getExtension() { return extension; }
|
public int getExtension() { return extension; }
|
||||||
public int size() { return reads.size(); }
|
public int size() { return reads.size(); }
|
||||||
|
public void clearReads() { reads.clear(); }
|
||||||
|
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
||||||
|
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||||
}
|
}
|
||||||
|
|
@ -1,9 +1,15 @@
|
||||||
package org.broadinstitute.sting.utils.fragments;
|
package org.broadinstitute.sting.utils.fragments;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -121,4 +127,63 @@ public class FragmentUtils {
|
||||||
return create(reads, reads.size(), SamRecordGetter);
|
return create(reads, reads.size(), SamRecordGetter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( List<GATKSAMRecord> overlappingPair ) {
|
||||||
|
final byte MIN_QUAL_BAD_OVERLAP = 16;
|
||||||
|
if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); }
|
||||||
|
|
||||||
|
GATKSAMRecord firstRead = overlappingPair.get(0);
|
||||||
|
GATKSAMRecord secondRead = overlappingPair.get(1);
|
||||||
|
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
||||||
|
firstRead = overlappingPair.get(1);
|
||||||
|
secondRead = overlappingPair.get(0);
|
||||||
|
}
|
||||||
|
if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) {
|
||||||
|
return overlappingPair; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
|
||||||
|
}
|
||||||
|
if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) {
|
||||||
|
return overlappingPair; // fragments contain indels so don't merge them
|
||||||
|
}
|
||||||
|
|
||||||
|
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
||||||
|
|
||||||
|
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
|
||||||
|
final int numBases = firstReadStop + secondRead.getReadLength();
|
||||||
|
final byte[] bases = new byte[numBases];
|
||||||
|
final byte[] quals = new byte[numBases];
|
||||||
|
final byte[] firstReadBases = firstRead.getReadBases();
|
||||||
|
final byte[] firstReadQuals = firstRead.getBaseQualities();
|
||||||
|
final byte[] secondReadBases = secondRead.getReadBases();
|
||||||
|
final byte[] secondReadQuals = secondRead.getBaseQualities();
|
||||||
|
for(int iii = 0; iii < firstReadStop; iii++) {
|
||||||
|
bases[iii] = firstReadBases[iii];
|
||||||
|
quals[iii] = firstReadQuals[iii];
|
||||||
|
}
|
||||||
|
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||||
|
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
||||||
|
return overlappingPair;// high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
|
||||||
|
}
|
||||||
|
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
|
||||||
|
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
|
||||||
|
}
|
||||||
|
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
|
||||||
|
bases[iii] = secondReadBases[iii-firstReadStop];
|
||||||
|
quals[iii] = secondReadQuals[iii-firstReadStop];
|
||||||
|
}
|
||||||
|
|
||||||
|
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
|
||||||
|
returnRead.setAlignmentStart(firstRead.getUnclippedStart());
|
||||||
|
returnRead.setReadBases( bases );
|
||||||
|
returnRead.setBaseQualities( quals );
|
||||||
|
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||||
|
returnRead.setReferenceName( firstRead.getReferenceName() );
|
||||||
|
final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
|
||||||
|
final ArrayList<CigarElement> cList = new ArrayList<CigarElement>();
|
||||||
|
cList.add(c);
|
||||||
|
returnRead.setCigar( new Cigar( cList ));
|
||||||
|
returnRead.setMappingQuality( firstRead.getMappingQuality() );
|
||||||
|
|
||||||
|
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();
|
||||||
|
returnList.add(returnRead);
|
||||||
|
return returnList;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue