Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Matt Hanna 2011-12-20 21:43:57 -05:00
commit 4d65aefc7b
18 changed files with 438 additions and 661 deletions

View File

@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReservoirDownsampler;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -432,7 +431,7 @@ public class LocusIteratorByState extends LocusIterator {
while(iterator.hasNext()) {
SAMRecordState state = iterator.next();
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
if ( filterBaseInRead(state.getRead(), location.getStart()) ) {
if ( filterBaseInRead((GATKSAMRecord) state.getRead(), location.getStart()) ) {
//discarded_bases++;
//printStatus("Adaptor bases", discarded_adaptor_bases);
continue;
@ -481,8 +480,8 @@ public class LocusIteratorByState extends LocusIterator {
* @param pos
* @return
*/
private static boolean filterBaseInRead(SAMRecord rec, long pos) {
return ReadUtils.readPairBaseOverlapType(rec, pos) == ReadUtils.OverlapType.IN_ADAPTOR;
private static boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
return ReadUtils.isBaseInsideAdaptor(rec, pos);
}
private void updateReadStates() {

View File

@ -299,9 +299,8 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
*/
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
read = ReadUtils.replaceSoftClipsWithMatches(read);
}
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES )
read = (new ReadClipper(read)).revertSoftClippedBases();
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
//

View File

@ -81,7 +81,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
final String sample = readGroup.getSample();
if ( ! headers.containsKey(sample) ) {
SAMFileHeader header = ReadUtils.copySAMFileHeader(this.getToolkit().getSAMFileHeader());
SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader());
logger.debug(String.format("Creating BAM header for sample %s", sample));
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
header.setReadGroups(readGroups);
@ -121,4 +121,20 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
return outputs;
}
public static SAMFileHeader duplicateSAMFileHeader(SAMFileHeader toCopy) {
SAMFileHeader copy = new SAMFileHeader();
copy.setSortOrder(toCopy.getSortOrder());
copy.setGroupOrder(toCopy.getGroupOrder());
copy.setProgramRecords(toCopy.getProgramRecords());
copy.setReadGroups(toCopy.getReadGroups());
copy.setSequenceDictionary(toCopy.getSequenceDictionary());
for (Map.Entry<String, String> e : toCopy.getAttributes())
copy.setAttribute(e.getKey(), e.getValue());
return copy;
}
}

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
@ -125,7 +126,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
//SAMRecord read = p.getRead();
GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null)
continue;
if(ReadUtils.is454Read(read)) {

View File

@ -463,7 +463,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void emitReadLists() {
// pre-merge lists to sort them in preparation for constrained SAMFileWriter
readsNotToClean.addAll(readsToClean.getReads());
ReadUtils.coordinateSortReads(readsNotToClean);
ReadUtils.sortReadsByCoordinate(readsNotToClean);
manager.addReads(readsNotToClean, readsActuallyCleaned);
readsToClean.clear();
readsNotToClean.clear();

View File

@ -32,6 +32,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -409,7 +410,7 @@ public class PairHMMIndelErrorModel {
}
else {
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
SAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null)
continue;

View File

@ -50,6 +50,44 @@ public class ReadClipper {
return read;
}
/**
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
*
* @param algorithm
* @return
*/
public GATKSAMRecord clipRead(ClippingRepresentation algorithm) {
if (ops == null)
return getRead();
else {
try {
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
for (ClippingOp op : getOps()) {
//check if the clipped read can still be clipped in the range requested
if (op.start < clippedRead.getReadLength()) {
ClippingOp fixedOperation = op;
if (op.stop >= clippedRead.getReadLength())
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
clippedRead = fixedOperation.apply(algorithm, clippedRead);
}
}
wasClipped = true;
ops.clear();
if ( clippedRead.isEmpty() )
return new GATKSAMRecord( clippedRead.getHeader() );
return clippedRead;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // this should never happen
}
}
}
// QUICK USE UTILITY FUNCTION
public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
@ -163,39 +201,13 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public GATKSAMRecord hardClipAdaptorSequence () {
final Integer adaptorBoundary = ReadUtils.getAdaptorBoundary(read);
if (adaptorBoundary == null || !ReadUtils.isInsideRead(read, adaptorBoundary))
return read;
/**
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
*
* @param algorithm
* @return
*/
public GATKSAMRecord clipRead(ClippingRepresentation algorithm) {
if (ops == null)
return getRead();
else {
try {
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
for (ClippingOp op : getOps()) {
//check if the clipped read can still be clipped in the range requested
if (op.start < clippedRead.getReadLength()) {
ClippingOp fixedOperation = op;
if (op.stop >= clippedRead.getReadLength())
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
clippedRead = fixedOperation.apply(algorithm, clippedRead);
}
}
wasClipped = true;
ops.clear();
if ( clippedRead.isEmpty() )
return new GATKSAMRecord( clippedRead.getHeader() );
return clippedRead;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // this should never happen
}
}
return read.getReadNegativeStrandFlag() ? hardClipByReferenceCoordinatesLeftTail(adaptorBoundary) : hardClipByReferenceCoordinatesRightTail(adaptorBoundary);
}
public GATKSAMRecord hardClipLeadingInsertions() {
@ -218,4 +230,44 @@ public class ReadClipper {
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
}
// STATIC VERSIONS OF THE QUICK CLIPPING FUNCTIONS
public static GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(GATKSAMRecord read, int refStop) {
return (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, refStop);
}
public static GATKSAMRecord hardClipByReferenceCoordinatesRightTail(GATKSAMRecord read, int refStart) {
return (new ReadClipper(read)).hardClipByReferenceCoordinates(refStart, -1);
}
public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) {
return (new ReadClipper(read)).hardClipByReadCoordinates(start, stop);
}
public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) {
return (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(left, right);
}
public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) {
return (new ReadClipper(read)).hardClipLowQualEnds(lowQual);
}
public static GATKSAMRecord hardClipSoftClippedBases (GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipSoftClippedBases();
}
public static GATKSAMRecord hardClipAdaptorSequence (GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipAdaptorSequence();
}
public static GATKSAMRecord hardClipLeadingInsertions(GATKSAMRecord read) {
return (new ReadClipper(read)).hardClipLeadingInsertions();
}
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
return (new ReadClipper(read)).revertSoftClippedBases();
}
}

View File

@ -24,10 +24,8 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.BAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import com.google.java.contract.Ensures;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.NGSPlatform;
import java.util.HashMap;
@ -273,5 +271,52 @@ public class GATKSAMRecord extends BAMRecord {
setReadGroup(rg);
}
/**
* Calculates the reference coordinate for the beginning of the read taking into account soft clips but not hard clips.
*
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
*
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
*/
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
public int getSoftStart() {
int start = this.getUnclippedStart();
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
start += cigarElement.getLength();
else
break;
}
return start;
}
/**
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
*
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
*
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
*/
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
public int getSoftEnd() {
int stop = this.getUnclippedStart();
if (ReadUtils.readIsEntirelyInsertion(this))
return stop;
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
}

View File

@ -45,84 +45,35 @@ import java.util.*;
public class ReadUtils {
private ReadUtils() { }
// ----------------------------------------------------------------------------------------------------
//
// Reduced read utilities
//
// ----------------------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------------------
//
// General utilities
//
// ----------------------------------------------------------------------------------------------------
public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) {
SAMFileHeader copy = new SAMFileHeader();
copy.setSortOrder(toCopy.getSortOrder());
copy.setGroupOrder(toCopy.getGroupOrder());
copy.setProgramRecords(toCopy.getProgramRecords());
copy.setReadGroups(toCopy.getReadGroups());
copy.setSequenceDictionary(toCopy.getSequenceDictionary());
for (Map.Entry<String, String> e : toCopy.getAttributes())
copy.setAttribute(e.getKey(), e.getValue());
return copy;
}
public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) {
if (file.endsWith(".bam"))
return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression);
return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file));
}
public static boolean isPlatformRead(SAMRecord read, String name) {
SAMReadGroupRecord readGroup = read.getReadGroup();
if (readGroup != null) {
Object readPlatformAttr = readGroup.getAttribute("PL");
if (readPlatformAttr != null)
return readPlatformAttr.toString().toUpperCase().contains(name);
}
return false;
}
// ---------------------------------------------------------------------------------------------------------
//
// utilities for detecting overlapping reads
//
// ---------------------------------------------------------------------------------------------------------
private static int DEFAULT_ADAPTOR_SIZE = 100;
/**
* Detects read pairs where the reads are so long relative to the over fragment size that they are
* reading into each other's adaptors.
*
* Normally, fragments are sufficiently far apart that reads aren't reading into each other.
*
* |--------------------> first read
* <--------------------| second read
*
* Sometimes, mostly due to lab errors or constraints, fragment library are made too short relative to the
* length of the reads. For example, it's possible to have 76bp PE reads with 125 bp inserts, so that ~25 bp of each
* read overlaps with its mate.
*
* |--------OOOOOOOOOOOO> first read
* <OOOOOOOOOOOO------------| second read
*
* This filter deals with the situation where the fragment is so small that the each read actually reads into the
* adaptor sequence of its mate, generating mismatches at both ends of the read:
*
* |----------------XXXX> first read
* <XXXX----------------| second read
*
* The code below returns NOT_OVERLAPPING for the first case, IN_ADAPTOR for the last case, and OVERLAPPING
* given a read and a reference aligned base position.
*
* @author depristo
* @version 0.1
* A marker to tell which end of the read has been clipped
*/
public enum ClippingTail {
LEFT_TAIL,
RIGHT_TAIL
}
public enum OverlapType { NOT_OVERLAPPING, IN_ADAPTOR}
/**
* A HashMap of the SAM spec read flag names
*
* Note: This is not being used right now, but can be useful in the future
*/
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
static {
readFlagNames.put(0x1, "Paired");
readFlagNames.put(0x2, "Proper");
readFlagNames.put(0x4, "Unmapped");
readFlagNames.put(0x8, "MateUnmapped");
readFlagNames.put(0x10, "Forward");
//readFlagNames.put(0x20, "MateForward");
readFlagNames.put(0x40, "FirstOfPair");
readFlagNames.put(0x80, "SecondOfPair");
readFlagNames.put(0x100, "NotPrimary");
readFlagNames.put(0x200, "NON-PF");
readFlagNames.put(0x400, "Duplicate");
}
/**
* This enum represents all the different ways in which a read can overlap an interval.
@ -169,448 +120,133 @@ public class ReadUtils {
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
/**
* God, there's a huge information asymmetry in SAM format:
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
* SAMFileWriter without compression otherwise.
*
* s1 e1
* |-----------------------> [record in hand]
* s2
* <-----------------------|
*
* s1, e1, and s2 are all in the record. From isize we can can compute e2 as s1 + isize + 1
*
* s2
* |----------------------->
* s1 e1
* <-----------------------| [record in hand]
*
* Here we cannot calculate e2 since the record carries s2 and e1 + isize is s2 now!
*
* This makes the following code a little nasty, since we can only detect if a base is in the adaptor, but not
* if it overlaps the read.
*
* @param read
* @param basePos
* @param adaptorLength
* @return
* @param header
* @param presorted
* @param file
* @param compression
* @return a SAMFileWriter with the compression level if it is a bam.
*/
public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos, final int adaptorLength) {
OverlapType state = OverlapType.NOT_OVERLAPPING;
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
if ( adaptorBoundaries != null ) { // we're not an unmapped pair -- cannot filter out
boolean inAdapator = basePos >= adaptorBoundaries.first && basePos <= adaptorBoundaries.second;
if ( inAdapator ) {
state = OverlapType.IN_ADAPTOR;
//System.out.printf("baseOverlapState: %50s negStrand=%b base=%d start=%d stop=%d, adaptorStart=%d adaptorEnd=%d isize=%d => %s%n",
// read.getReadName(), read.getReadNegativeStrandFlag(), basePos, read.getAlignmentStart(), read.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, read.getInferredInsertSize(), state);
}
}
return state;
}
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord read, int adaptorLength) {
int isize = read.getInferredInsertSize();
if ( isize == 0 )
return null; // don't worry about unmapped pairs
int adaptorStart, adaptorEnd;
if ( read.getReadNegativeStrandFlag() ) {
// we are on the negative strand, so our mate is on the positive strand
int mateStart = read.getMateAlignmentStart();
adaptorStart = mateStart - adaptorLength - 1;
adaptorEnd = mateStart - 1;
} else {
// we are on the positive strand, so our mate is on the negative strand
int mateEnd = read.getAlignmentStart() + isize - 1;
adaptorStart = mateEnd + 1;
adaptorEnd = mateEnd + adaptorLength;
}
return new Pair<Integer, Integer>(adaptorStart, adaptorEnd);
public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) {
if (file.endsWith(".bam"))
return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression);
return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file));
}
/**
* is this base inside the adaptor of the read?
*
* @param read original SAM record
* @param adaptorLength length of adaptor sequence
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
* There are two cases to treat here:
*
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
*
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
*
* @param read the read to test
* @param basePos base position in REFERENCE coordinates (not read coordinates)
* @return whether or not the base is in the adaptor
*/
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) {
public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) {
Integer adaptorBoundary = getAdaptorBoundary(read);
if (adaptorBoundary == null || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE)
return false;
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
GATKSAMRecord result = read;
if ( adaptorBoundaries != null ) {
if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() )
result = hardClipStartOfRead(read, adaptorBoundaries.second);
else if ( !read.getReadNegativeStrandFlag() && adaptorBoundaries.first <= read.getAlignmentEnd() )
result = hardClipEndOfRead(read, adaptorBoundaries.first);
}
return result;
}
// return true if the read needs to be completely clipped
private static GATKSAMRecord hardClipStartOfRead(GATKSAMRecord oldRec, int stopPosition) {
if ( stopPosition >= oldRec.getAlignmentEnd() ) {
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
return null;
}
GATKSAMRecord read;
try {
read = (GATKSAMRecord)oldRec.clone();
} catch (Exception e) {
return null;
}
//System.out.printf("Clipping start of read: %50s start=%d adaptorEnd=%d isize=%d %n",
// read.getReadName(), read.getAlignmentStart(), stopPosition, read.getInferredInsertSize());
Cigar oldCigar = read.getCigar();
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
int currentPos = read.getAlignmentStart();
int basesToClip = 0;
int basesAlreadyClipped = 0;
for ( CigarElement ce : oldCigar.getCigarElements() ) {
if ( currentPos > stopPosition) {
newCigarElements.add(ce);
continue;
}
int elementLength = ce.getLength();
switch ( ce.getOperator() ) {
case M:
for (int i = 0; i < elementLength; i++, currentPos++, basesToClip++) {
if ( currentPos > stopPosition ) {
newCigarElements.add(new CigarElement(elementLength - i, CigarOperator.M));
break;
}
}
break;
case I:
case S:
basesToClip += elementLength;
break;
case D:
case N:
currentPos += elementLength;
break;
case H:
basesAlreadyClipped += elementLength;
case P:
break;
default: throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
}
}
// copy over the unclipped bases
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
int newLength = bases.length - basesToClip;
byte[] newBases = new byte[newLength];
byte[] newQuals = new byte[newLength];
System.arraycopy(bases, basesToClip, newBases, 0, newLength);
System.arraycopy(quals, basesToClip, newQuals, 0, newLength);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases
newCigarElements.addFirst(new CigarElement(basesToClip + basesAlreadyClipped, CigarOperator.H));
Cigar newCigar = new Cigar(newCigarElements);
read.setCigar(newCigar);
// adjust the start accordingly
read.setAlignmentStart(stopPosition + 1);
return read;
}
private static GATKSAMRecord hardClipEndOfRead(GATKSAMRecord oldRec, int startPosition) {
if ( startPosition <= oldRec.getAlignmentStart() ) {
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
return null;
}
GATKSAMRecord read;
try {
read = (GATKSAMRecord)oldRec.clone();
} catch (Exception e) {
return null;
}
//System.out.printf("Clipping end of read: %50s adaptorStart=%d end=%d isize=%d %n",
// read.getReadName(), startPosition, read.getAlignmentEnd(), read.getInferredInsertSize());
Cigar oldCigar = read.getCigar();
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
int currentPos = read.getAlignmentStart();
int basesToKeep = 0;
int basesAlreadyClipped = 0;
for ( CigarElement ce : oldCigar.getCigarElements() ) {
int elementLength = ce.getLength();
if ( currentPos >= startPosition ) {
if ( ce.getOperator() == CigarOperator.H )
basesAlreadyClipped += elementLength;
continue;
}
switch ( ce.getOperator() ) {
case M:
for (int i = 0; i < elementLength; i++, currentPos++, basesToKeep++) {
if ( currentPos == startPosition ) {
newCigarElements.add(new CigarElement(i, CigarOperator.M));
break;
}
}
if ( currentPos != startPosition )
newCigarElements.add(ce);
break;
case I:
case S:
newCigarElements.add(ce);
basesToKeep += elementLength;
break;
case D:
case N:
newCigarElements.add(ce);
currentPos += elementLength;
break;
case H:
case P:
newCigarElements.add(ce);
break;
default: throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
}
}
// copy over the unclipped bases
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
byte[] newBases = new byte[basesToKeep];
byte[] newQuals = new byte[basesToKeep];
System.arraycopy(bases, 0, newBases, 0, basesToKeep);
System.arraycopy(quals, 0, newQuals, 0, basesToKeep);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases
newCigarElements.add(new CigarElement((bases.length - basesToKeep) + basesAlreadyClipped, CigarOperator.H));
Cigar newCigar = new Cigar(newCigarElements);
read.setCigar(newCigar);
// adjust the stop accordingly
// read.setAlignmentEnd(startPosition - 1);
return read;
return read.getReadNegativeStrandFlag() ? basePos <= adaptorBoundary : basePos >= adaptorBoundary;
}
/**
* Hard clips away (i.e.g, removes from the read) bases that were previously soft clipped.
* Finds the adaptor boundary around the read and returns the first base inside the adaptor that is closest to
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
* beginning of the fragment.
*
* @param read
* @return
* There are two cases we need to treat here:
*
* 1) Our read is in the reverse strand :
*
* <----------------------| *
* |--------------------->
*
* in these cases, the adaptor boundary is at the mate start (minus one)
*
* 2) Our read is in the forward strand :
*
* |----------------------> *
* <----------------------|
*
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
*
* @param read the read being tested for the adaptor boundary
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the insert size cannot be determined (and is necessary for the calculation).
*/
@Requires("read != null")
@Ensures("result != null")
public static GATKSAMRecord hardClipSoftClippedBases(GATKSAMRecord read) {
List<CigarElement> cigarElts = read.getCigar().getCigarElements();
public static Integer getAdaptorBoundary(final SAMRecord read) {
if ( read.getReadUnmappedFlag() )
return null; // don't worry about unmapped pairs
if ( cigarElts.size() == 1 ) // can't be soft clipped, just return
return read;
final int isize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value)
int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
int keepStart = 0, keepEnd = read.getReadLength() - 1;
List<CigarElement> newCigarElements = new LinkedList<CigarElement>();
if ( read.getReadNegativeStrandFlag() )
adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
else if (isize > 0)
adaptorBoundary = read.getAlignmentStart() + isize + 1; // case 2 (see header)
else
return null; // this is a case 2 where for some reason we cannot estimate the insert size
for ( int i = 0; i < cigarElts.size(); i++ ) {
CigarElement ce = cigarElts.get(i);
int l = ce.getLength();
switch ( ce.getOperator() ) {
case S:
if ( i == 0 )
keepStart = l;
else
keepEnd = read.getReadLength() - l - 1;
newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP));
break;
default:
newCigarElements.add(ce);
break;
}
}
// Merges tandem cigar elements like 5H10H or 2S5S to 15H or 7S
// this will happen if you soft clip a read that has been hard clipped before
// like: 5H20S => 5H20H
List<CigarElement> mergedCigarElements = new LinkedList<CigarElement>();
Iterator<CigarElement> cigarElementIterator = newCigarElements.iterator();
CigarOperator currentOperator = null;
int currentOperatorLength = 0;
while (cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
if (currentOperator != cigarElement.getOperator()) {
if (currentOperator != null)
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
currentOperator = cigarElement.getOperator();
currentOperatorLength = cigarElement.getLength();
}
else
currentOperatorLength += cigarElement.getLength();
}
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
return hardClipBases(read, keepStart, keepEnd, mergedCigarElements);
return adaptorBoundary;
}
/**
* Hard clips out the bases in read, keeping the bases from keepStart to keepEnd, inclusive. Note these
* are offsets, so they are 0 based
* is the read a 454 read ?
*
* @param read
* @param keepStart
* @param keepEnd
* @param newCigarElements
* @return
* @param read the read to test
* @return checks the read group tag PL for the default 454 tag
*/
@Requires({
"read != null",
"keepStart >= 0",
"keepEnd < read.getReadLength()",
"read.getReadUnmappedFlag() || newCigarElements != null"})
@Ensures("result != null")
public static GATKSAMRecord hardClipBases(GATKSAMRecord read, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
int newLength = keepEnd - keepStart + 1;
if ( newLength != read.getReadLength() ) {
try {
read = (GATKSAMRecord)read.clone();
// copy over the unclipped bases
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
byte[] newBases = new byte[newLength];
byte[] newQuals = new byte[newLength];
System.arraycopy(bases, keepStart, newBases, 0, newLength);
System.arraycopy(quals, keepStart, newQuals, 0, newLength);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases, if the read isn't unmapped
if ( ! read.getReadUnmappedFlag() ) {
Cigar newCigar = new Cigar(newCigarElements);
read.setCigar(newCigar);
}
} catch ( CloneNotSupportedException e ) {
throw new ReviewedStingException("WTF, where did clone go?", e);
}
}
return read;
}
public static GATKSAMRecord replaceSoftClipsWithMatches(GATKSAMRecord read) {
List<CigarElement> newCigarElements = new ArrayList<CigarElement>();
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
if ( ce.getOperator() == CigarOperator.SOFT_CLIP )
newCigarElements.add(new CigarElement(ce.getLength(), CigarOperator.MATCH_OR_MISMATCH));
else
newCigarElements.add(ce);
}
if ( newCigarElements.size() > 1 ) { //
CigarElement first = newCigarElements.get(0);
CigarElement second = newCigarElements.get(1);
if ( first.getOperator() == CigarOperator.MATCH_OR_MISMATCH && second.getOperator() == CigarOperator.MATCH_OR_MISMATCH ) {
newCigarElements.set(0, new CigarElement(first.getLength() + second.getLength(), CigarOperator.MATCH_OR_MISMATCH));
newCigarElements.remove(1);
}
}
if ( newCigarElements.size() > 1 ) { //
CigarElement penult = newCigarElements.get(newCigarElements.size()-2);
CigarElement last = newCigarElements.get(newCigarElements.size()-1);
if ( penult.getOperator() == CigarOperator.MATCH_OR_MISMATCH && penult.getOperator() == CigarOperator.MATCH_OR_MISMATCH ) {
newCigarElements.set(newCigarElements.size()-2, new CigarElement(penult.getLength() + last.getLength(), CigarOperator.MATCH_OR_MISMATCH));
newCigarElements.remove(newCigarElements.size()-1);
}
}
read.setCigar(new Cigar(newCigarElements));
return read;
}
private static int DEFAULT_ADAPTOR_SIZE = 100;
/**
*
* @param read original SAM record
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
*/
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read) {
return hardClipAdaptorSequence(read, DEFAULT_ADAPTOR_SIZE);
}
public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos) {
return readPairBaseOverlapType(read, basePos, DEFAULT_ADAPTOR_SIZE);
}
public static boolean is454Read(SAMRecord read) {
return isPlatformRead(read, "454");
}
/**
* is the read a SOLiD read ?
*
* @param read the read to test
* @return checks the read group tag PL for the default SOLiD tag
*/
public static boolean isSOLiDRead(SAMRecord read) {
return isPlatformRead(read, "SOLID");
}
/**
* is the read a SLX read ?
*
* @param read the read to test
* @return checks the read group tag PL for the default SLX tag
*/
public static boolean isSLXRead(SAMRecord read) {
return isPlatformRead(read, "ILLUMINA");
}
private static final Map<Integer, String> readFlagNames
= new HashMap<Integer, String>();
static {
readFlagNames.put(0x1, "Paired");
readFlagNames.put(0x2, "Proper");
readFlagNames.put(0x4, "Unmapped");
readFlagNames.put(0x8, "MateUnmapped");
readFlagNames.put(0x10, "Forward");
//readFlagNames.put(0x20, "MateForward");
readFlagNames.put(0x40, "FirstOfPair");
readFlagNames.put(0x80, "SecondOfPair");
readFlagNames.put(0x100, "NotPrimary");
readFlagNames.put(0x200, "NON-PF");
readFlagNames.put(0x400, "Duplicate");
}
public static String readFlagsAsString(GATKSAMRecord read) {
String flags = "";
for (int flag : readFlagNames.keySet()) {
if ((read.getFlags() & flag) != 0) {
flags += readFlagNames.get(flag) + " ";
}
/**
* checks if the read has a platform tag in the readgroup equal to 'name' ?
*
* @param read the read to test
* @param name the platform name to test
* @return whether or not name == PL tag in the read group of read
*/
public static boolean isPlatformRead(SAMRecord read, String name) {
SAMReadGroupRecord readGroup = read.getReadGroup();
if (readGroup != null) {
Object readPlatformAttr = readGroup.getAttribute("PL");
if (readPlatformAttr != null)
return readPlatformAttr.toString().toUpperCase().contains(name);
}
return flags;
return false;
}
/**
* Returns the collections of reads sorted in coordinate order, according to the order defined
* in the reads themselves
@ -618,12 +254,19 @@ public class ReadUtils {
* @param reads
* @return
*/
public final static List<GATKSAMRecord> coordinateSortReads(List<GATKSAMRecord> reads) {
public final static List<GATKSAMRecord> sortReadsByCoordinate(List<GATKSAMRecord> reads) {
final SAMRecordComparator comparer = new SAMRecordCoordinateComparator();
Collections.sort(reads, comparer);
return reads;
}
/**
* If a read starts in INSERTION, returns the first element length.
*
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
* @param read
* @return the length of the first insertion, or 0 if there is none (see warning).
*/
public final static int getFirstInsertionOffset(SAMRecord read) {
CigarElement e = read.getCigar().getCigarElement(0);
if ( e.getOperator() == CigarOperator.I )
@ -632,8 +275,15 @@ public class ReadUtils {
return 0;
}
/**
* If a read ends in INSERTION, returns the last element length.
*
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
* @param read
* @return the length of the last insertion, or 0 if there is none (see warning).
*/
public final static int getLastInsertionOffset(SAMRecord read) {
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength()-1);
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
if ( e.getOperator() == CigarOperator.I )
return e.getLength();
else
@ -649,8 +299,8 @@ public class ReadUtils {
*/
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) {
int sStart = getRefCoordSoftUnclippedStart(read);
int sStop = getRefCoordSoftUnclippedEnd(read);
int sStart = read.getSoftStart();
int sStop = read.getSoftEnd();
int uStart = read.getUnclippedStart();
int uStop = read.getUnclippedEnd();
@ -684,51 +334,6 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
start += cigarElement.getLength();
else
break;
}
return start;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) {
int stop = read.getUnclippedStart();
if (readIsEntirelyInsertion(read))
return stop;
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
private static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.INSERTION)
return false;
}
return true;
}
public enum ClippingTail {
LEFT_TAIL,
RIGHT_TAIL
}
/**
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
@ -768,14 +373,14 @@ public class ReadUtils {
* @param refCoord
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
*/
@Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"})
@Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
boolean fallsInsideDeletion = false;
int goal = refCoord - getRefCoordSoftUnclippedStart(read); // The goal is to move this many reference bases
int goal = refCoord - read.getSoftStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
@ -866,7 +471,30 @@ public class ReadUtils {
return comp.compare(read1, read2);
}
// TEST UTILITIES
/**
* Is a base inside a read?
*
* @param read the read to evaluate
* @param referenceCoordinate the reference coordinate of the base to test
* @return true if it is inside the read, false otherwise.
*/
public static boolean isInsideRead(final GATKSAMRecord read, final int referenceCoordinate) {
return referenceCoordinate >= read.getAlignmentStart() && referenceCoordinate <= read.getAlignmentEnd();
}
/**
* Is this read all insertion?
*
* @param read
* @return whether or not the only element in the cigar string is an Insertion
*/
public static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.INSERTION)
return false;
}
return true;
}

View File

@ -29,9 +29,12 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
String gatk_args = "-T Pileup -I " + validationDataLocation + "OV-0930.normal.chunk.bam "
+ "-R " + hg18Reference
+ " -show_indels -o %s";
String expected_md5="06eedc2e7927650961d99d703f4301a4";
String expected_md5="da2a02d02abac9de14cc4b187d8595a1";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args,1,Arrays.asList(expected_md5));
executeTest("Testing the extended pileup with indel records included on a small chunk of Ovarian dataset with 20 indels (1 D, 19 I)", spec);
// before Adaptor clipping
// String expected_md5="06eedc2e7927650961d99d703f4301a4";
}
}

View File

@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fbb656369eaa48153d127bd12db59d8f"));
Arrays.asList("ac5f409856a1b79316469733e62abb91"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("2977bb30c8b84a5f4094fe6090658561"));
Arrays.asList("f9aa7bee5a61ac1a9187d0cf1e8af471"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("42dd979a0a931c18dc9be40308bac321"));
Arrays.asList("6f27fd863b6718d59d2a2d8e2a20bcae"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("0948cd1dba7d61f283cc4cf2a7757d92"));
Arrays.asList("40bbd3d5a2397a007c0e74211fb33433"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("477eac07989593b58bb361f3429c085a"));
Arrays.asList("40622d39072b298440a77ecc794116e7"));
executeTest("test exclude annotations", spec);
}
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("062155edec46a8c52243475fbf3a2943"));
Arrays.asList("31faae1bc588d195ff553cf6c47fabfa"));
executeTest("test overwriting header", spec);
}

View File

@ -32,13 +32,13 @@ import java.util.Arrays;
public class CallableLociWalkerIntegrationTest extends WalkerTest {
final static String commonArgs = "-R " + b36KGReference + " -T CallableLoci -I " + validationDataLocation + "/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s";
final static String SUMMARY_MD5 = "ed4c255bb78313b8e7982127caf3d6c4";
final static String SUMMARY_MD5 = "cd597a8dae35c226a2cb110b1c9f32d5";
@Test
public void testCallableLociWalkerBed() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("884c9c2d96419d990a708d2bd98fcefa", SUMMARY_MD5));
Arrays.asList("c86ac1ef404c11d5e5452e020c8f7ce9", SUMMARY_MD5));
executeTest("formatBed", spec);
}
@ -46,7 +46,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalkerPerBase() {
String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("fb4524f8b3b213060c0c5b85362b5902", SUMMARY_MD5));
Arrays.asList("d8536a55fe5f6fdb1ee6c9511082fdfd", SUMMARY_MD5));
executeTest("format_state_per_base", spec);
}
@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalker3() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("86bd1a5f79356b3656412c4b1c60709a", "6fefb144a60b89c27293ce5ca6e10e6a"));
Arrays.asList("bc966060184bf4605a31da7fe383464e", "d624eda8f6ed14b9251ebeec73e37867"));
executeTest("formatBed lots of arguments", spec);
}
}

View File

@ -61,13 +61,14 @@ public class DepthOfCoverageB36IntegrationTest extends WalkerTest {
File baseOutputFile = this.createTempFile("depthofcoveragemapq0",".tmp");
spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("f39af6ad99520fd4fb27b409ab0344a0",baseOutputFile);
spec.addAuxFile("6b15f5330414b6d4e2f6caea42139fa1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("cc6640d82077991dde8a2b523935cdff", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("5b6c16a1c667c844882e9dce71454fc4",baseOutputFile);
spec.addAuxFile("fc161ec1b61dc67bc6a5ce36cb2d02c9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("89321bbfb76a4e1edc0905d50503ba1f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("0fb627234599c258a3fee1b2703e164a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("cb73a0fa0cee50f1fb8f249315d38128", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("347b47ef73fbd4e277704ddbd7834f69", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("4ec920335d4b9573f695c39d62748089", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
spec.addAuxFile("4dd16b659065e331ed4bd3ab0dae6c1b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("2be0c18b501f4a3d8c5e5f99738b4713", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("5a26ef61f586f58310812580ce842462", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
execute("testMapQ0Only",spec);
}
@ -83,7 +84,7 @@ public class DepthOfCoverageB36IntegrationTest extends WalkerTest {
File baseOutputFile = this.createTempFile("testManySamples",".tmp");
spec.setOutputFileLocation(baseOutputFile);
spec.addAuxFile("c9561b52344536d2b06ab97b0bb1a234",baseOutputFile);
spec.addAuxFile("d73fa1fc492f7dcc1d75056f8c12c92a",baseOutputFile);
execute("testLotsOfSamples",spec);
}

View File

@ -55,26 +55,26 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
spec.setOutputFileLocation(baseOutputFile);
// now add the expected files that get generated
spec.addAuxFile("423571e4c05e7934322172654ac6dbb7", baseOutputFile);
spec.addAuxFile("9df5e7e07efeb34926c94a724714c219", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
spec.addAuxFile("471c34ad2e4f7228efd20702d5941ba9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
spec.addAuxFile("9667c77284c2c08e647b162d0e9652d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
spec.addAuxFile("5a96c75f96d6fa6ee617451d731dae37", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
spec.addAuxFile("b82846df660f0aac8429aec57c2a62d6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
spec.addAuxFile("2aae346204c5f15517158da8e61a6c16", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
spec.addAuxFile("e70952f241eebb9b5448f2e7cb288131", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
spec.addAuxFile("054ed1e184f46d6a170dc9bf6524270c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
spec.addAuxFile("d53431022f7387fe9ac47814ab1fcd88", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("e013cb5b11b0321a81c8dbd7c1863787", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("661160f571def8c323345b5859cfb9da", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("c95a7a6840334cadd0e520939615c77b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
spec.addAuxFile("19e862f7ed3de97f2569803f766b7433", baseOutputFile);
spec.addAuxFile("c64cc5636d4880b80b71169ed1832cd7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
spec.addAuxFile("1a8ba07a60e55f9fdadc89d00b1f3394", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
spec.addAuxFile("0075cead73a901e3a9d07c5d9c2b75f4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
spec.addAuxFile("d757be2f953f893e66eff1ef1f0fff4e", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
spec.addAuxFile("de08996729c774590d6a4954c906fe84", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
spec.addAuxFile("58ad39b100d1f2af7d119f28ba626bfd", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
spec.addAuxFile("0b4ce6059e6587ae5a986afbbcc7d783", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
spec.addAuxFile("adc2b2babcdd72a843878acf2d510ca7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
spec.addAuxFile("884281c139241c5db3c9f90e8684d084", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
spec.addAuxFile("b90636cad74ff4f6b9ff9a596e145bd6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
spec.addAuxFile("ad540b355ef90c566bebaeabd70026d2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
spec.addAuxFile("27fe09a02a5b381e0ed633587c0f4b23", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
spec.addAuxFile("5fcd53b4bd167b5e6d5f92329cf8678e", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
spec.addAuxFile("7a2a19e54f73a8e07de2f020f1f913dd", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("852a079c5e9e93e7daad31fd6a9f4a49", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
spec.addAuxFile("0828762842103edfaf115ef4e50809c6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
spec.addAuxFile("5c5aeb28419bba1decb17f8a166777f2", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
spec.addAuxFile("e5fd6216b3d6a751f3a90677b4e5bf3c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
execute("testBaseOutputNoFiltering",spec);
}

View File

@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a"));
Arrays.asList("66ed60c6c1190754abd8a0a9d1d8d61e"));
executeTest("test MultiSample Pilot1", spec);
}
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "d971d392956aea69c3707da64d24485b" );
e.put( "--output_mode EMIT_ALL_SITES", "21993e71ca1a06a0d1f11d58e3cc26c3" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "42e4ea7878ef8d96215accb3ba4e97b7" );
e.put( "--output_mode EMIT_ALL_SITES", "e0443c720149647469f2a2f3fb73942f" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -189,7 +189,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
Arrays.asList("2b2729414ae855d390e7940956745bce"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -208,7 +208,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
Arrays.asList("95c6120efb92e5a325a5cec7d77c2dab"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -227,7 +227,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("a64d2e65b5927260e4ce0d948760cc5c"));
Arrays.asList("d87ce4b405d4f7926d1c36aee7053975"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -255,7 +255,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("69107157632714150fc068d412e31939"));
Arrays.asList("c5989e5d67d9e5fe8c5c956f12a975da"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -265,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("4ffda07590e06d58ed867ae326d74b2d"));
Arrays.asList("daca0741278de32e507ad367e67753b6"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("45633d905136c86e9d3f90ce613255e5"));
Arrays.asList("0ccc4e876809566510429c64adece2c7"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("8bd5028cf294850b8a95b3c0af23d728"));
Arrays.asList("cff6dd0f4eb1ef0b6fc476da6ffead19"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("814dcd66950635a870602d90a1618cce"));
Arrays.asList("1e2a4aab26e9ab0dae709d33a669e036"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}

View File

@ -31,10 +31,11 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@DataProvider(name = "cctestdata")
public Object[][] createCCTestData() {
new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "5a52b00d9794d27af723bcf93366681e" );
new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "9469d6b65880abe4e5babc1c1a69889d" );
new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941");
new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" );
new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "64e9f17a1cf6fc04c1f2717c2d2eca67" );
new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "36c0c467b6245c2c6c4e9c956443a154" );
new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "ed15f8bf03bb2ea9b7c26844be829c0d" );
return CCTest.getTests(CCTest.class);
}
@ -88,10 +89,11 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@DataProvider(name = "trtestdata")
public Object[][] createTRTestData() {
new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" );
new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "f020725d9f75ad8f1c14bfae056e250f" );
new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "d04cf1f6df486e45226ebfbf93a188a5");
new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" );
new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "2a37c6001826bfabf87063b1dfcf594f" );
new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b2f4757bc47cf23bd9a09f756c250787" );
new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "313a21a8a88e3460b6e71ec5ffc50f0f" );
return TRTest.getTests(TRTest.class);
}
@ -121,7 +123,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesUseOriginalQuals() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "278846c55d97bd9812b758468a83f559");
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "bd8288b1fc7629e2e8c2cf7f65fefa8f");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -145,7 +147,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorMaxQ70() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" );
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "f020725d9f75ad8f1c14bfae056e250f" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -174,7 +176,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "8379f24cf5312587a1f92c162ecc220f" );
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1f643bca090478ba68aac88db835a629" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -200,7 +202,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "2ad4c17ac3ed380071137e4e53a398a5" );
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "613fb2bbe01af8cbe6a188a10c1582ca" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -228,7 +230,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesBED() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb");
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b00e99219aeafe2516c6232b7d6a0a00");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -252,7 +254,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCFPlusDBsnp() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "9131d96f39badbf9753653f55b148012");
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b92788ce92f49415af3a75a2e4a2b33");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -280,7 +282,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoIndex() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "8993d32df5cb66c7149f59eccbd57f4c" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "f34f7141351a5dbf9664c67260f94e96" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -306,7 +308,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoIndex() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "5f913c98ca99754902e9d34f99df468f" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "13c83656567cee9e93bda9874ee80234" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();

View File

@ -32,7 +32,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -90,13 +89,13 @@ public class ReadClipperUnitTest extends BaseTest {
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
for (int i=alnStart; i<=alnEnd; i++) {
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
}
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
@ -111,7 +110,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
@ -127,7 +126,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils;
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
@ -33,20 +33,6 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {
SAMRecord clipped = ReadUtils.hardClipBases(read, expectedStart, expectedStop - 1, null);
String expectedBases = BASES.substring(expectedStart, expectedStop);
String expectedQuals = QUALS.substring(expectedStart, expectedStop);
Assert.assertEquals(clipped.getReadBases(), expectedBases.getBytes(), "Clipped bases not those expected");
Assert.assertEquals(clipped.getBaseQualityString(), expectedQuals, "Clipped quals not those expected");
}
@Test public void testNoClip() { testReadBasesAndQuals(read, 0, 4); }
@Test public void testClip1Front() { testReadBasesAndQuals(read, 1, 4); }
@Test public void testClip2Front() { testReadBasesAndQuals(read, 2, 4); }
@Test public void testClip1Back() { testReadBasesAndQuals(read, 0, 3); }
@Test public void testClip2Back() { testReadBasesAndQuals(read, 0, 2); }
@Test
public void testReducedReads() {
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
@ -69,4 +55,49 @@ public class ReadUtilsUnitTest extends BaseTest {
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
}
@Test
public void testGetAdaptorBoundary() {
final byte [] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
final byte [] quals = {30, 30, 30, 30, 30, 30, 30, 30};
final String cigar = "8M";
final int fragmentSize = 10;
final int mateStart = 1000;
final int BEFORE = mateStart - 2;
final int AFTER = mateStart + 2;
int myStart, boundary;
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
read.setMateAlignmentStart(mateStart);
read.setInferredInsertSize(fragmentSize);
// Test case 1: positive strand, first read
myStart = BEFORE;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
// Test case 2: positive strand, second read
myStart = AFTER;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
Assert.assertEquals(boundary, myStart + fragmentSize + 1);
// Test case 3: negative strand, second read
myStart = AFTER;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
Assert.assertEquals(boundary, mateStart - 1);
// Test case 4: negative strand, first read
myStart = BEFORE;
read.setAlignmentStart(myStart);
read.setReadNegativeStrandFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
Assert.assertEquals(boundary, mateStart - 1);
}
}