Moving trimToVariableRegion out of reduced reads and into a public static ReadClipper function. HaplotypeCaller clips reads to the active region boundries before passing to the HMM. The philosophy of the HC is moving towards genotyping the entire haplotype sequence contained within the active region as a single allele.
This commit is contained in:
parent
8db11eb781
commit
2836c161ee
|
|
@ -4,6 +4,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -38,14 +39,30 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||
reads.add( read );
|
||||
}
|
||||
|
||||
public void hardClipToActiveRegion() {
|
||||
final ArrayList<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, activeRegionLoc.getStart(), activeRegionLoc.getStop() );
|
||||
reads.clear();
|
||||
reads.addAll(clippedReads);
|
||||
}
|
||||
|
||||
public ArrayList<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
return getReference( referenceReader, 0 );
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
return getActiveRegionReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return referenceReader.getSubsequenceAt( activeRegionLoc.getContig(),
|
||||
Math.max(1, activeRegionLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(activeRegionLoc.getContig()).getSequenceLength(), activeRegionLoc.getStop() + padding) ).getBases();
|
||||
}
|
||||
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
return getFullReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(),
|
||||
Math.max(1, fullExtentReferenceLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
|
||||
|
|
|
|||
|
|
@ -312,6 +312,42 @@ public class ReadClipper {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Hard clip the read to the variable region (from refStart to refStop)
|
||||
*
|
||||
* @param read the read to be clipped
|
||||
* @param refStart the beginning of the variant region (inclusive)
|
||||
* @param refStop the end of the variant region (inclusive)
|
||||
* @return the read hard clipped to the variant region
|
||||
*/
|
||||
public static GATKSAMRecord hardClipToRegion( final GATKSAMRecord read, final int refStart, final int refStop ) {
|
||||
final int start = read.getAlignmentStart();
|
||||
final int stop = read.getAlignmentEnd();
|
||||
|
||||
// check if the read is contained in region
|
||||
if (start <= refStop && stop >= refStart) {
|
||||
if (start < refStart && stop > refStop)
|
||||
return hardClipBothEndsByReferenceCoordinates(read, refStart - 1, refStop + 1);
|
||||
else if (start < refStart)
|
||||
return hardClipByReferenceCoordinatesLeftTail(read, refStart - 1);
|
||||
else if (stop > refStop)
|
||||
return hardClipByReferenceCoordinatesRightTail(read, refStop + 1);
|
||||
return read;
|
||||
} else
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
|
||||
}
|
||||
public static ArrayList<GATKSAMRecord> hardClipToRegion( final ArrayList<GATKSAMRecord> reads, final int refStart, final int refStop ) {
|
||||
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>( reads.size() );
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop );
|
||||
if( !clippedRead.isEmpty() ) {
|
||||
returnList.add( clippedRead );
|
||||
}
|
||||
}
|
||||
return returnList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a read contains adaptor sequences. If it does, hard clips them out.
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue