Added contracts, docs, and tests for several methods in AlignmentUtils. There are over 74K tests being run now for this class!
* AlignmentUtils.getMismatchCount() * AlignmentUtils.calcAlignmentByteArrayOffset() * AlignmentUtils.readToAlignmentByteArray(). * AlignmentUtils.leftAlignIndel()
This commit is contained in:
parent
cc7731d61f
commit
9826192854
|
|
@ -180,6 +180,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
|
|
||||||
for (final PileupElement p : pileup) {
|
for (final PileupElement p : pileup) {
|
||||||
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
||||||
|
if ( haplotypeFromRead != null )
|
||||||
candidateHaplotypeQueue.add(haplotypeFromRead);
|
candidateHaplotypeQueue.add(haplotypeFromRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -230,8 +231,18 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a haplotype object constructed from the read or null if read's cigar is null
|
||||||
|
*
|
||||||
|
* @param p pileup element representing the read
|
||||||
|
* @param contextSize the context size to use
|
||||||
|
* @param locus the position
|
||||||
|
* @return possibly null Haplotype object constructed from the read
|
||||||
|
*/
|
||||||
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
||||||
final GATKSAMRecord read = p.getRead();
|
final GATKSAMRecord read = p.getRead();
|
||||||
|
if ( read.getCigar() == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
final byte[] haplotypeBases = new byte[contextSize];
|
final byte[] haplotypeBases = new byte[contextSize];
|
||||||
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
||||||
|
|
@ -347,6 +358,10 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
double expected = 0.0;
|
double expected = 0.0;
|
||||||
double mismatches = 0.0;
|
double mismatches = 0.0;
|
||||||
|
|
||||||
|
final GATKSAMRecord read = p.getRead();
|
||||||
|
if ( read.getCigar() == null )
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
// What's the expected mismatch rate under the model that this read is actually sampled from
|
// What's the expected mismatch rate under the model that this read is actually sampled from
|
||||||
// this haplotype? Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
|
// this haplotype? Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
|
||||||
// the observed base is actually from a c with an error rate e. Since e is the rate at which we'd
|
// the observed base is actually from a c with an error rate e. Since e is the rate at which we'd
|
||||||
|
|
@ -358,14 +373,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
|
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
|
||||||
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
|
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
|
||||||
final byte[] haplotypeBases = haplotype.getBases();
|
final byte[] haplotypeBases = haplotype.getBases();
|
||||||
final GATKSAMRecord read = p.getRead();
|
|
||||||
byte[] readBases = read.getReadBases();
|
byte[] readBases = read.getReadBases();
|
||||||
|
|
||||||
readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
|
readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
|
||||||
byte[] readQuals = read.getBaseQualities();
|
byte[] readQuals = read.getBaseQualities();
|
||||||
readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||||
int readOffsetFromPileup = p.getOffset();
|
int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
|
||||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
|
|
||||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
||||||
|
|
||||||
for (int i = 0; i < contextSize; i++) {
|
for (int i = 0; i < contextSize; i++) {
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
||||||
if (alleleLikelihoodMap == null) {
|
if (alleleLikelihoodMap == null) {
|
||||||
// use old UG SNP-based version if we don't have per-read allele likelihoods
|
// use old UG SNP-based version if we don't have per-read allele likelihoods
|
||||||
for ( final PileupElement p : pileup ) {
|
for ( final PileupElement p : pileup ) {
|
||||||
if ( isUsableBase(p) ) {
|
if ( isUsableBase(p) && p.getRead().getCigar() != null ) {
|
||||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||||
|
|
||||||
readPos = getFinalReadPosition(p.getRead(),readPos);
|
readPos = getFinalReadPosition(p.getRead(),readPos);
|
||||||
|
|
@ -105,7 +105,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
||||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
final GATKSAMRecord read = el.getKey();
|
final GATKSAMRecord read = el.getKey();
|
||||||
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
||||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
|
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED || read.getCigar() == null )
|
||||||
continue;
|
continue;
|
||||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 );
|
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 );
|
||||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
|
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
|
||||||
|
|
|
||||||
|
|
@ -404,7 +404,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
|
|
||||||
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||||
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
|
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
|
||||||
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
|
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0, true) );
|
||||||
|
|
||||||
if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 ) { // protect against SW failures
|
if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 ) { // protect against SW failures
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -445,7 +445,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||||
|
|
||||||
h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
|
h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
|
||||||
h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
|
h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0, true) );
|
||||||
if ( haplotype.isArtificialHaplotype() ) {
|
if ( haplotype.isArtificialHaplotype() ) {
|
||||||
h.setArtificialEvent(haplotype.getArtificialEvent());
|
h.setArtificialEvent(haplotype.getArtificialEvent());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -767,7 +767,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
final double improvement = (bestConsensus == null ? -1 : ((double)(totalRawMismatchSum - bestConsensus.mismatchSum))/10.0);
|
final double improvement = (bestConsensus == null ? -1 : ((double)(totalRawMismatchSum - bestConsensus.mismatchSum))/10.0);
|
||||||
if ( improvement >= LOD_THRESHOLD ) {
|
if ( improvement >= LOD_THRESHOLD ) {
|
||||||
|
|
||||||
bestConsensus.cigar = AlignmentUtils.leftAlignIndel(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference);
|
bestConsensus.cigar = AlignmentUtils.leftAlignIndel(bestConsensus.cigar, reference, bestConsensus.str, bestConsensus.positionOnReference, bestConsensus.positionOnReference, true);
|
||||||
|
|
||||||
// start cleaning the appropriate reads
|
// start cleaning the appropriate reads
|
||||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
|
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes ) {
|
||||||
|
|
@ -926,7 +926,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||||
if ( numBlocks == 2 ) {
|
if ( numBlocks == 2 ) {
|
||||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-leftmostIndex, 0);
|
Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-leftmostIndex, 0, true);
|
||||||
aRead.setCigar(newCigar, false);
|
aRead.setCigar(newCigar, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
|
||||||
// move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
// move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||||
if ( numBlocks == 2 ) {
|
if ( numBlocks == 2 ) {
|
||||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0);
|
Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0, true);
|
||||||
newCigar = IndelRealigner.reclipCigar(newCigar, read);
|
newCigar = IndelRealigner.reclipCigar(newCigar, read);
|
||||||
read.setCigar(newCigar);
|
read.setCigar(newCigar);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void HCTestStructuralIndels() {
|
public void HCTestStructuralIndels() {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("87fe31a4bbd68a9eb5d5910db5011c82"));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("bd8c30b99d0ac7c4108e3d88c272a996"));
|
||||||
executeTest("HCTestStructuralIndels: ", spec);
|
executeTest("HCTestStructuralIndels: ", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
Cigar originalCigar = new Cigar(elements);
|
Cigar originalCigar = new Cigar(elements);
|
||||||
|
|
||||||
// left align the CIGAR
|
// left align the CIGAR
|
||||||
Cigar newCigar = AlignmentUtils.leftAlignIndel(originalCigar, refSeq, originalIndel, 0, 0);
|
Cigar newCigar = AlignmentUtils.leftAlignIndel(originalCigar, refSeq, originalIndel, 0, 0, true);
|
||||||
|
|
||||||
// update if necessary and write
|
// update if necessary and write
|
||||||
if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) {
|
if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) {
|
||||||
|
|
|
||||||
|
|
@ -31,11 +31,9 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -66,7 +64,27 @@ public final class AlignmentUtils {
|
||||||
|
|
||||||
// todo -- this code and mismatchesInRefWindow should be combined and optimized into a single
|
// todo -- this code and mismatchesInRefWindow should be combined and optimized into a single
|
||||||
// todo -- high performance implementation. We can do a lot better than this right now
|
// todo -- high performance implementation. We can do a lot better than this right now
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count how many bases mismatch the reference. Indels are not considered mismatching.
|
||||||
|
*
|
||||||
|
* @param r the sam record to check against
|
||||||
|
* @param refSeq the byte array representing the reference sequence
|
||||||
|
* @param refIndex the index in the reference byte array of the read's first base
|
||||||
|
* @param startOnRead the index in the read's bases from which we start counting
|
||||||
|
* @param nReadBases the number of bases after (but including) startOnRead that we check
|
||||||
|
* @return non-null object representing the mismatch count
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
|
public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
|
||||||
|
if ( r == null ) throw new IllegalArgumentException("attempting to calculate the mismatch count from a read that is null");
|
||||||
|
if ( refSeq == null ) throw new IllegalArgumentException("attempting to calculate the mismatch count with a reference sequence that is null");
|
||||||
|
if ( refIndex < 0 ) throw new IllegalArgumentException("attempting to calculate the mismatch count with a reference index that is negative");
|
||||||
|
if ( startOnRead < 0 ) throw new IllegalArgumentException("attempting to calculate the mismatch count with a read start that is negative");
|
||||||
|
if ( nReadBases < 0 ) throw new IllegalArgumentException("attempting to calculate the mismatch count for a negative number of read bases");
|
||||||
|
if ( refSeq.length - refIndex < r.getReadLength() )
|
||||||
|
throw new IllegalArgumentException("attempting to calculate the mismatch count against a reference string that is smaller than the read");
|
||||||
|
|
||||||
MismatchCount mc = new MismatchCount();
|
MismatchCount mc = new MismatchCount();
|
||||||
|
|
||||||
int readIdx = 0;
|
int readIdx = 0;
|
||||||
|
|
@ -241,7 +259,24 @@ public final class AlignmentUtils {
|
||||||
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isDeletion(), alignmentStart, refLocus );
|
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isDeletion(), alignmentStart, refLocus );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the index into the read's bases of the beginning of the encompassing cigar element for a given cigar and offset
|
||||||
|
*
|
||||||
|
* @param cigar the read's CIGAR -- cannot be null
|
||||||
|
* @param offset the offset to use for the calculation or -1 if in the middle of a deletion
|
||||||
|
* @param isDeletion are we in the middle of a deletion?
|
||||||
|
* @param alignmentStart the alignment start of the read
|
||||||
|
* @param refLocus the reference position of the offset
|
||||||
|
* @return a non-negative int index
|
||||||
|
*/
|
||||||
|
@Ensures("result >= 0")
|
||||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isDeletion, final int alignmentStart, final int refLocus) {
|
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isDeletion, final int alignmentStart, final int refLocus) {
|
||||||
|
if ( cigar == null ) throw new IllegalArgumentException("attempting to find the alignment position from a CIGAR that is null");
|
||||||
|
if ( offset < -1 ) throw new IllegalArgumentException("attempting to find the alignment position with an offset that is negative (and not -1)");
|
||||||
|
if ( alignmentStart < 0 ) throw new IllegalArgumentException("attempting to find the alignment position from an alignment start that is negative");
|
||||||
|
if ( refLocus < 0 ) throw new IllegalArgumentException("attempting to find the alignment position from a reference position that is negative");
|
||||||
|
if ( offset >= cigar.getReadLength() ) throw new IllegalArgumentException("attempting to find the alignment position of an offset than is larger than the read length");
|
||||||
|
|
||||||
int pileupOffset = offset;
|
int pileupOffset = offset;
|
||||||
|
|
||||||
// Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases
|
// Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases
|
||||||
|
|
@ -302,32 +337,19 @@ public final class AlignmentUtils {
|
||||||
return alignmentPos;
|
return alignmentPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate an array of bases for just those that are aligned to the reference (i.e. no clips or insertions)
|
||||||
|
*
|
||||||
|
* @param cigar the read's CIGAR -- cannot be null
|
||||||
|
* @param read the read's base array
|
||||||
|
* @return a non-null array of bases (bytes)
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
public static byte[] readToAlignmentByteArray(final Cigar cigar, final byte[] read) {
|
public static byte[] readToAlignmentByteArray(final Cigar cigar, final byte[] read) {
|
||||||
|
if ( cigar == null ) throw new IllegalArgumentException("attempting to generate an alignment from a CIGAR that is null");
|
||||||
|
if ( read == null ) throw new IllegalArgumentException("attempting to generate an alignment from a read sequence that is null");
|
||||||
|
|
||||||
int alignmentLength = 0;
|
final int alignmentLength = cigar.getReferenceLength();
|
||||||
for (int iii = 0; iii < cigar.numCigarElements(); iii++) {
|
|
||||||
|
|
||||||
final CigarElement ce = cigar.getCigarElement(iii);
|
|
||||||
final int elementLength = ce.getLength();
|
|
||||||
|
|
||||||
switch (ce.getOperator()) {
|
|
||||||
case D:
|
|
||||||
case N:
|
|
||||||
case M:
|
|
||||||
case EQ:
|
|
||||||
case X:
|
|
||||||
alignmentLength += elementLength;
|
|
||||||
break;
|
|
||||||
case I:
|
|
||||||
case S:
|
|
||||||
case H:
|
|
||||||
case P:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final byte[] alignment = new byte[alignmentLength];
|
final byte[] alignment = new byte[alignmentLength];
|
||||||
int alignPos = 0;
|
int alignPos = 0;
|
||||||
int readPos = 0;
|
int readPos = 0;
|
||||||
|
|
@ -339,35 +361,31 @@ public final class AlignmentUtils {
|
||||||
switch (ce.getOperator()) {
|
switch (ce.getOperator()) {
|
||||||
case I:
|
case I:
|
||||||
if (alignPos > 0) {
|
if (alignPos > 0) {
|
||||||
if (alignment[alignPos - 1] == BaseUtils.Base.A.base) {
|
final int prevPos = alignPos - 1;
|
||||||
alignment[alignPos - 1] = PileupElement.A_FOLLOWED_BY_INSERTION_BASE;
|
if (alignment[prevPos] == BaseUtils.Base.A.base) {
|
||||||
} else if (alignment[alignPos - 1] == BaseUtils.Base.C.base) {
|
alignment[prevPos] = PileupElement.A_FOLLOWED_BY_INSERTION_BASE;
|
||||||
alignment[alignPos - 1] = PileupElement.C_FOLLOWED_BY_INSERTION_BASE;
|
} else if (alignment[prevPos] == BaseUtils.Base.C.base) {
|
||||||
} else if (alignment[alignPos - 1] == BaseUtils.Base.T.base) {
|
alignment[prevPos] = PileupElement.C_FOLLOWED_BY_INSERTION_BASE;
|
||||||
alignment[alignPos - 1] = PileupElement.T_FOLLOWED_BY_INSERTION_BASE;
|
} else if (alignment[prevPos] == BaseUtils.Base.T.base) {
|
||||||
} else if (alignment[alignPos - 1] == BaseUtils.Base.G.base) {
|
alignment[prevPos] = PileupElement.T_FOLLOWED_BY_INSERTION_BASE;
|
||||||
alignment[alignPos - 1] = PileupElement.G_FOLLOWED_BY_INSERTION_BASE;
|
} else if (alignment[prevPos] == BaseUtils.Base.G.base) {
|
||||||
|
alignment[prevPos] = PileupElement.G_FOLLOWED_BY_INSERTION_BASE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case S:
|
case S:
|
||||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
readPos += elementLength;
|
||||||
readPos++;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case D:
|
case D:
|
||||||
case N:
|
case N:
|
||||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
for (int jjj = 0; jjj < elementLength; jjj++) {
|
||||||
alignment[alignPos] = PileupElement.DELETION_BASE;
|
alignment[alignPos++] = PileupElement.DELETION_BASE;
|
||||||
alignPos++;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
case EQ:
|
case EQ:
|
||||||
case X:
|
case X:
|
||||||
for (int jjj = 0; jjj < elementLength; jjj++) {
|
for (int jjj = 0; jjj < elementLength; jjj++) {
|
||||||
alignment[alignPos] = read[readPos];
|
alignment[alignPos++] = read[readPos++];
|
||||||
alignPos++;
|
|
||||||
readPos++;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case H:
|
case H:
|
||||||
|
|
@ -450,34 +468,98 @@ public final class AlignmentUtils {
|
||||||
* should be the position where the alignment of that part of the read starts at. In other words, both refIndex and readIndex are
|
* should be the position where the alignment of that part of the read starts at. In other words, both refIndex and readIndex are
|
||||||
* always the positions where the cigar starts on the ref and on the read, respectively.
|
* always the positions where the cigar starts on the ref and on the read, respectively.
|
||||||
* <p/>
|
* <p/>
|
||||||
* If the alignment has an indel, then this method attempts moving this indel left across a stretch of repetitive bases. For instance, if the original cigar
|
* If the alignment has one or more indels, this method attempts to move them left across a stretch of repetitive bases.
|
||||||
* specifies that (any) one AT is deleted from a repeat sequence TATATATA, the output cigar will always mark the leftmost AT
|
* For instance, if the original cigar specifies that (any) one AT is deleted from a repeat sequence TATATATA, the output
|
||||||
* as deleted. If there is no indel in the original cigar, or the indel position is determined unambiguously (i.e. inserted/deleted sequence
|
* cigar will always mark the leftmost AT as deleted. If there is no indel in the original cigar or if the indel position
|
||||||
* is not repeated), the original cigar is returned.
|
* is determined unambiguously (i.e. inserted/deleted sequence is not repeated), the original cigar is returned.
|
||||||
|
*
|
||||||
|
* Note that currently we do not actually support the case where there is more than one indel in the alignment. We will throw
|
||||||
|
* an exception if there is -- unless the
|
||||||
*
|
*
|
||||||
* @param cigar structure of the original alignment
|
* @param cigar structure of the original alignment
|
||||||
* @param refSeq reference sequence the read is aligned to
|
* @param refSeq reference sequence the read is aligned to
|
||||||
* @param readSeq read sequence
|
* @param readSeq read sequence
|
||||||
* @param refIndex 0-based alignment start position on ref
|
* @param refIndex 0-based alignment start position on ref
|
||||||
* @param readIndex 0-based alignment start position on read
|
* @param readIndex 0-based alignment start position on read
|
||||||
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
* @param doNotThrowExceptionForMultipleIndels if true we will not throw an exception if we encounter multiple indels in the alignment will instead will return the original cigar
|
||||||
|
* @return a non-null cigar, in which the indels are guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
||||||
*/
|
*/
|
||||||
public static Cigar leftAlignIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
|
@Ensures("result != null")
|
||||||
|
public static Cigar leftAlignIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex, final boolean doNotThrowExceptionForMultipleIndels) {
|
||||||
|
ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex);
|
||||||
|
|
||||||
|
final int numIndels = countIndelElements(cigar);
|
||||||
|
if ( numIndels == 0 )
|
||||||
|
return cigar;
|
||||||
|
if ( numIndels == 1 )
|
||||||
|
return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex);
|
||||||
|
|
||||||
|
// if we got here then there is more than 1 indel in the alignment
|
||||||
|
if ( doNotThrowExceptionForMultipleIndels )
|
||||||
|
return cigar;
|
||||||
|
|
||||||
|
throw new UnsupportedOperationException("attempting to left align a CIGAR that has more than 1 indel in its alignment but this functionality has not been implemented yet");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void ensureLeftAlignmentHasGoodArguments(final Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
|
||||||
|
if ( cigar == null ) throw new IllegalArgumentException("attempting to left align a CIGAR that is null");
|
||||||
|
if ( refSeq == null ) throw new IllegalArgumentException("attempting to left align a reference sequence that is null");
|
||||||
|
if ( readSeq == null ) throw new IllegalArgumentException("attempting to left align a read sequence that is null");
|
||||||
|
if ( refIndex < 0 ) throw new IllegalArgumentException("attempting to left align with a reference index less than 0");
|
||||||
|
if ( readIndex < 0 ) throw new IllegalArgumentException("attempting to left align with a read index less than 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counts the number of I/D operators
|
||||||
|
*
|
||||||
|
* @param cigar cigar to check -- cannot be null
|
||||||
|
* @return non-negative count of indel operators
|
||||||
|
*/
|
||||||
|
@Requires("cigar != null")
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
private static int countIndelElements(final Cigar cigar) {
|
||||||
|
int indelCount = 0;
|
||||||
|
for ( CigarElement ce : cigar.getCigarElements() ) {
|
||||||
|
if ( ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I )
|
||||||
|
indelCount++;
|
||||||
|
}
|
||||||
|
return indelCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See the documentation for AlignmentUtils.leftAlignIndel() for more details.
|
||||||
|
*
|
||||||
|
* This flavor of the left alignment works if and only if the alignment has one - and only one - indel.
|
||||||
|
* An exception is thrown if there are no indels or more than 1 indel in the alignment.
|
||||||
|
*
|
||||||
|
* @param cigar structure of the original alignment -- cannot be null
|
||||||
|
* @param refSeq reference sequence the read is aligned to
|
||||||
|
* @param readSeq read sequence
|
||||||
|
* @param refIndex 0-based alignment start position on ref
|
||||||
|
* @param readIndex 0-based alignment start position on read
|
||||||
|
* @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
|
||||||
|
ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex);
|
||||||
|
|
||||||
int indexOfIndel = -1;
|
int indexOfIndel = -1;
|
||||||
for (int i = 0; i < cigar.numCigarElements(); i++) {
|
for (int i = 0; i < cigar.numCigarElements(); i++) {
|
||||||
CigarElement ce = cigar.getCigarElement(i);
|
CigarElement ce = cigar.getCigarElement(i);
|
||||||
if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) {
|
if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) {
|
||||||
// if there is more than 1 indel, don't left align
|
// if there is more than 1 indel, exception out
|
||||||
if (indexOfIndel != -1)
|
if (indexOfIndel != -1)
|
||||||
return cigar;
|
throw new IllegalArgumentException("attempting to left align a CIGAR that has more than 1 indel in its alignment");
|
||||||
indexOfIndel = i;
|
indexOfIndel = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if there is no indel or if the alignment starts with an insertion (so that there
|
// if there is no indel, exception out
|
||||||
// is no place on the read to move that insertion further left), we are done
|
if ( indexOfIndel == -1 )
|
||||||
if (indexOfIndel < 1) return cigar;
|
throw new IllegalArgumentException("attempting to left align a CIGAR that has no indels in its alignment");
|
||||||
|
// if the alignment starts with an insertion (so that there is no place on the read to move that insertion further left), we are done
|
||||||
|
if ( indexOfIndel == 0 )
|
||||||
|
return cigar;
|
||||||
|
|
||||||
final int indelLength = cigar.getCigarElement(indexOfIndel).getLength();
|
final int indelLength = cigar.getCigarElement(indexOfIndel).getLength();
|
||||||
|
|
||||||
|
|
@ -545,6 +627,15 @@ public final class AlignmentUtils {
|
||||||
return new Cigar(elements);
|
return new Cigar(elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move the indel in a given cigar string one base to the left
|
||||||
|
*
|
||||||
|
* @param cigar original cigar
|
||||||
|
* @param indexOfIndel the index of the indel cigar element
|
||||||
|
* @return non-null cigar with indel moved one base to the left
|
||||||
|
*/
|
||||||
|
@Requires("cigar != null && indexOfIndel >= 0 && indexOfIndel < cigar.numCigarElements()")
|
||||||
|
@Ensures("result != null")
|
||||||
private static Cigar moveCigarLeft(Cigar cigar, int indexOfIndel) {
|
private static Cigar moveCigarLeft(Cigar cigar, int indexOfIndel) {
|
||||||
// get the first few elements
|
// get the first few elements
|
||||||
ArrayList<CigarElement> elements = new ArrayList<CigarElement>(cigar.numCigarElements());
|
ArrayList<CigarElement> elements = new ArrayList<CigarElement>(cigar.numCigarElements());
|
||||||
|
|
@ -568,6 +659,19 @@ public final class AlignmentUtils {
|
||||||
return new Cigar(elements);
|
return new Cigar(elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the string (really a byte array) representation of an indel-containing cigar against the reference.
|
||||||
|
*
|
||||||
|
* @param cigar the indel-containing cigar
|
||||||
|
* @param indexOfIndel the index of the indel cigar element
|
||||||
|
* @param refSeq the reference sequence
|
||||||
|
* @param readSeq the read sequence for the cigar
|
||||||
|
* @param refIndex the starting reference index into refSeq
|
||||||
|
* @param readIndex the starting read index into readSeq
|
||||||
|
* @return non-null byte array which is the indel representation against the reference
|
||||||
|
*/
|
||||||
|
@Requires("cigar != null && indexOfIndel >= 0 && indexOfIndel < cigar.numCigarElements() && refSeq != null && readSeq != null && refIndex >= 0 && readIndex >= 0")
|
||||||
|
@Ensures("result != null")
|
||||||
private static byte[] createIndelString(final Cigar cigar, final int indexOfIndel, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) {
|
private static byte[] createIndelString(final Cigar cigar, final int indexOfIndel, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) {
|
||||||
CigarElement indel = cigar.getCigarElement(indexOfIndel);
|
CigarElement indel = cigar.getCigarElement(indexOfIndel);
|
||||||
int indelLength = indel.getLength();
|
int indelLength = indel.getLength();
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
|
|
@ -325,4 +326,318 @@ public class AlignmentUtilsUnitTest {
|
||||||
final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold);
|
final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold);
|
||||||
Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString());
|
Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Test AlignmentUtils.getMismatchCount() //
|
||||||
|
////////////////////////////////////////////
|
||||||
|
|
||||||
|
@DataProvider(name = "MismatchCountDataProvider")
|
||||||
|
public Object[][] makeMismatchCountDataProvider() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
final int readLength = 20;
|
||||||
|
final int lengthOfIndel = 2;
|
||||||
|
final int locationOnReference = 10;
|
||||||
|
final byte[] reference = Utils.dupBytes((byte)'A', readLength);
|
||||||
|
final byte[] quals = Utils.dupBytes((byte)'A', readLength);
|
||||||
|
|
||||||
|
|
||||||
|
for ( int startOnRead = 0; startOnRead <= readLength; startOnRead++ ) {
|
||||||
|
for ( int basesToRead = 0; basesToRead <= readLength; basesToRead++ ) {
|
||||||
|
for ( final int lengthOfSoftClip : Arrays.asList(0, 1, 10) ) {
|
||||||
|
for ( final int lengthOfFirstM : Arrays.asList(0, 3) ) {
|
||||||
|
for ( final char middleOp : Arrays.asList('M', 'D', 'I') ) {
|
||||||
|
for ( final int mismatchLocation : Arrays.asList(-1, 0, 5, 10, 15, 19) ) {
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, locationOnReference, readLength);
|
||||||
|
|
||||||
|
// set the read's bases and quals
|
||||||
|
final byte[] readBases = reference.clone();
|
||||||
|
// create the mismatch if requested
|
||||||
|
if ( mismatchLocation != -1 )
|
||||||
|
readBases[mismatchLocation] = (byte)'C';
|
||||||
|
read.setReadBases(readBases);
|
||||||
|
read.setBaseQualities(quals);
|
||||||
|
|
||||||
|
// create the CIGAR string
|
||||||
|
read.setCigarString(buildTestCigarString(middleOp, lengthOfSoftClip, lengthOfFirstM, lengthOfIndel, readLength));
|
||||||
|
|
||||||
|
// now, determine whether or not there's a mismatch
|
||||||
|
final boolean isMismatch;
|
||||||
|
if ( mismatchLocation < startOnRead || mismatchLocation >= startOnRead + basesToRead || mismatchLocation < lengthOfSoftClip ) {
|
||||||
|
isMismatch = false;
|
||||||
|
} else if ( middleOp == 'M' || middleOp == 'D' || mismatchLocation < lengthOfSoftClip + lengthOfFirstM || mismatchLocation >= lengthOfSoftClip + lengthOfFirstM + lengthOfIndel ) {
|
||||||
|
isMismatch = true;
|
||||||
|
} else {
|
||||||
|
isMismatch = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
tests.add(new Object[]{read, locationOnReference, startOnRead, basesToRead, isMismatch});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "MismatchCountDataProvider")
|
||||||
|
public void testMismatchCountData(final GATKSAMRecord read, final int refIndex, final int startOnRead, final int basesToRead, final boolean isMismatch) {
|
||||||
|
final byte[] reference = Utils.dupBytes((byte)'A', 100);
|
||||||
|
final int actual = AlignmentUtils.getMismatchCount(read, reference, refIndex, startOnRead, basesToRead).numMismatches;
|
||||||
|
Assert.assertEquals(actual, isMismatch ? 1 : 0, "Wrong number of mismatches detected for read " + read.getSAMString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String buildTestCigarString(final char middleOp, final int lengthOfSoftClip, final int lengthOfFirstM, final int lengthOfIndel, final int readLength) {
|
||||||
|
final StringBuilder cigar = new StringBuilder();
|
||||||
|
int remainingLength = readLength;
|
||||||
|
if ( lengthOfSoftClip > 0 ) {
|
||||||
|
cigar.append(lengthOfSoftClip + "S");
|
||||||
|
remainingLength -= lengthOfSoftClip;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( middleOp == 'M' ) {
|
||||||
|
cigar.append(remainingLength + "M");
|
||||||
|
} else {
|
||||||
|
if ( lengthOfFirstM > 0 ) {
|
||||||
|
cigar.append(lengthOfFirstM + "M");
|
||||||
|
remainingLength -= lengthOfFirstM;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( middleOp == 'D' ) {
|
||||||
|
cigar.append(lengthOfIndel + "D");
|
||||||
|
} else {
|
||||||
|
cigar.append(lengthOfIndel + "I");
|
||||||
|
remainingLength -= lengthOfIndel;
|
||||||
|
}
|
||||||
|
cigar.append(remainingLength + "M");
|
||||||
|
}
|
||||||
|
|
||||||
|
return cigar.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Test AlignmentUtils.calcAlignmentByteArrayOffset() //
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
@DataProvider(name = "AlignmentByteArrayOffsetDataProvider")
|
||||||
|
public Object[][] makeAlignmentByteArrayOffsetDataProvider() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
final int readLength = 20;
|
||||||
|
final int lengthOfIndel = 2;
|
||||||
|
final int locationOnReference = 20;
|
||||||
|
|
||||||
|
for ( int offset = 0; offset < readLength; offset++ ) {
|
||||||
|
for ( final int lengthOfSoftClip : Arrays.asList(0, 1, 10) ) {
|
||||||
|
for ( final int lengthOfFirstM : Arrays.asList(0, 3) ) {
|
||||||
|
for ( final char middleOp : Arrays.asList('M', 'D', 'I') ) {
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, locationOnReference, readLength);
|
||||||
|
// create the CIGAR string
|
||||||
|
read.setCigarString(buildTestCigarString(middleOp, lengthOfSoftClip, lengthOfFirstM, lengthOfIndel, readLength));
|
||||||
|
|
||||||
|
// now, determine the expected alignment offset
|
||||||
|
final int expected;
|
||||||
|
boolean isDeletion = false;
|
||||||
|
if ( offset < lengthOfSoftClip ) {
|
||||||
|
expected = 0;
|
||||||
|
} else if ( middleOp == 'M' || offset < lengthOfSoftClip + lengthOfFirstM ) {
|
||||||
|
expected = offset - lengthOfSoftClip;
|
||||||
|
} else if ( offset < lengthOfSoftClip + lengthOfFirstM + lengthOfIndel ) {
|
||||||
|
if ( middleOp == 'D' ) {
|
||||||
|
isDeletion = true;
|
||||||
|
expected = offset - lengthOfSoftClip;
|
||||||
|
} else {
|
||||||
|
expected = lengthOfFirstM;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
expected = offset - lengthOfSoftClip - (middleOp == 'I' ? lengthOfIndel : -lengthOfIndel);
|
||||||
|
}
|
||||||
|
|
||||||
|
tests.add(new Object[]{read.getCigar(), offset, expected, isDeletion, lengthOfSoftClip});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "AlignmentByteArrayOffsetDataProvider")
|
||||||
|
public void testAlignmentByteArrayOffsetData(final Cigar cigar, final int offset, final int expectedResult, final boolean isDeletion, final int lengthOfSoftClip) {
|
||||||
|
final int actual = AlignmentUtils.calcAlignmentByteArrayOffset(cigar, isDeletion ? -1 : offset, isDeletion, 20, 20 + offset - lengthOfSoftClip);
|
||||||
|
Assert.assertEquals(actual, expectedResult, "Wrong alignment offset detected for cigar " + cigar.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
// Test AlignmentUtils.readToAlignmentByteArray() //
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
@DataProvider(name = "ReadToAlignmentByteArrayDataProvider")
|
||||||
|
public Object[][] makeReadToAlignmentByteArrayDataProvider() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
final int readLength = 20;
|
||||||
|
final int lengthOfIndel = 2;
|
||||||
|
final int locationOnReference = 20;
|
||||||
|
|
||||||
|
for ( final int lengthOfSoftClip : Arrays.asList(0, 1, 10) ) {
|
||||||
|
for ( final int lengthOfFirstM : Arrays.asList(0, 3) ) {
|
||||||
|
for ( final char middleOp : Arrays.asList('M', 'D', 'I') ) {
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, locationOnReference, readLength);
|
||||||
|
// create the CIGAR string
|
||||||
|
read.setCigarString(buildTestCigarString(middleOp, lengthOfSoftClip, lengthOfFirstM, lengthOfIndel, readLength));
|
||||||
|
|
||||||
|
// now, determine the byte array size
|
||||||
|
final int expected = readLength - lengthOfSoftClip - (middleOp == 'I' ? lengthOfIndel : (middleOp == 'D' ? -lengthOfIndel : 0));
|
||||||
|
final int indelBasesStart = middleOp != 'M' ? lengthOfFirstM : -1;
|
||||||
|
|
||||||
|
tests.add(new Object[]{read.getCigar(), expected, middleOp, indelBasesStart, lengthOfIndel});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "ReadToAlignmentByteArrayDataProvider")
|
||||||
|
public void testReadToAlignmentByteArrayData(final Cigar cigar, final int expectedLength, final char middleOp, final int startOfIndelBases, final int lengthOfDeletion) {
|
||||||
|
final byte[] read = Utils.dupBytes((byte)'A', cigar.getReadLength());
|
||||||
|
final byte[] alignment = AlignmentUtils.readToAlignmentByteArray(cigar, read);
|
||||||
|
|
||||||
|
Assert.assertEquals(alignment.length, expectedLength, "Wrong alignment length detected for cigar " + cigar.toString());
|
||||||
|
|
||||||
|
for ( int i = 0; i < alignment.length; i++ ) {
|
||||||
|
final byte expectedBase;
|
||||||
|
if ( middleOp == 'D' && i >= startOfIndelBases && i < startOfIndelBases + lengthOfDeletion )
|
||||||
|
expectedBase = PileupElement.DELETION_BASE;
|
||||||
|
else if ( middleOp == 'I' && i == startOfIndelBases - 1 )
|
||||||
|
expectedBase = PileupElement.A_FOLLOWED_BY_INSERTION_BASE;
|
||||||
|
else
|
||||||
|
expectedBase = (byte)'A';
|
||||||
|
Assert.assertEquals(alignment[i], expectedBase, "Wrong base detected at position " + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Test AlignmentUtils.leftAlignIndel() //
|
||||||
|
//////////////////////////////////////////
|
||||||
|
|
||||||
|
@DataProvider(name = "LeftAlignIndelDataProvider")
|
||||||
|
public Object[][] makeLeftAlignIndelDataProvider() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
final byte[] repeat1Reference = "ABCDEFGHIJKLMNOPXXXXXXXXXXABCDEFGHIJKLMNOP".getBytes();
|
||||||
|
final byte[] repeat2Reference = "ABCDEFGHIJKLMNOPXYXYXYXYXYABCDEFGHIJKLMNOP".getBytes();
|
||||||
|
final byte[] repeat3Reference = "ABCDEFGHIJKLMNOPXYZXYZXYZXYZABCDEFGHIJKLMN".getBytes();
|
||||||
|
final int referenceLength = repeat1Reference.length;
|
||||||
|
|
||||||
|
for ( int indelStart = 0; indelStart < repeat1Reference.length; indelStart++ ) {
|
||||||
|
for ( final int indelSize : Arrays.asList(0, 1, 2, 3, 4) ) {
|
||||||
|
for ( final char indelOp : Arrays.asList('D', 'I') ) {
|
||||||
|
|
||||||
|
if ( indelOp == 'D' && indelStart + indelSize >= repeat1Reference.length )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final int readLength = referenceLength - (indelOp == 'D' ? indelSize : -indelSize);
|
||||||
|
|
||||||
|
// create the original CIGAR string
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength);
|
||||||
|
read.setCigarString(buildTestCigarString(indelSize == 0 ? 'M' : indelOp, 0, indelStart, indelSize, readLength));
|
||||||
|
final Cigar originalCigar = read.getCigar();
|
||||||
|
|
||||||
|
final Cigar expectedCigar1 = makeExpectedCigar1(originalCigar, indelOp, indelStart, indelSize, readLength);
|
||||||
|
final byte[] readString1 = makeReadString(repeat1Reference, indelOp, indelStart, indelSize, readLength, 1);
|
||||||
|
tests.add(new Object[]{originalCigar, expectedCigar1, repeat1Reference, readString1, 1});
|
||||||
|
|
||||||
|
final Cigar expectedCigar2 = makeExpectedCigar2(originalCigar, indelOp, indelStart, indelSize, readLength);
|
||||||
|
final byte[] readString2 = makeReadString(repeat2Reference, indelOp, indelStart, indelSize, readLength, 2);
|
||||||
|
tests.add(new Object[]{originalCigar, expectedCigar2, repeat2Reference, readString2, 2});
|
||||||
|
|
||||||
|
final Cigar expectedCigar3 = makeExpectedCigar3(originalCigar, indelOp, indelStart, indelSize, readLength);
|
||||||
|
final byte[] readString3 = makeReadString(repeat3Reference, indelOp, indelStart, indelSize, readLength, 3);
|
||||||
|
tests.add(new Object[]{originalCigar, expectedCigar3, repeat3Reference, readString3, 3});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
private Cigar makeExpectedCigar1(final Cigar originalCigar, final char indelOp, final int indelStart, final int indelSize, final int readLength) {
|
||||||
|
if ( indelSize == 0 || indelStart < 17 || indelStart > (26 - (indelOp == 'D' ? indelSize : 0)) )
|
||||||
|
return originalCigar;
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength);
|
||||||
|
read.setCigarString(buildTestCigarString(indelOp, 0, 16, indelSize, readLength));
|
||||||
|
return read.getCigar();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Cigar makeExpectedCigar2(final Cigar originalCigar, final char indelOp, final int indelStart, final int indelSize, final int readLength) {
|
||||||
|
if ( indelStart < 17 || indelStart > (26 - (indelOp == 'D' ? indelSize : 0)) )
|
||||||
|
return originalCigar;
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength);
|
||||||
|
|
||||||
|
if ( indelOp == 'I' && (indelSize == 1 || indelSize == 3) && indelStart % 2 == 1 )
|
||||||
|
read.setCigarString(buildTestCigarString(indelOp, 0, Math.max(indelStart - indelSize, 16), indelSize, readLength));
|
||||||
|
else if ( (indelSize == 2 || indelSize == 4) && (indelOp == 'D' || indelStart % 2 == 0) )
|
||||||
|
read.setCigarString(buildTestCigarString(indelOp, 0, 16, indelSize, readLength));
|
||||||
|
else
|
||||||
|
return originalCigar;
|
||||||
|
|
||||||
|
return read.getCigar();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Cigar makeExpectedCigar3(final Cigar originalCigar, final char indelOp, final int indelStart, final int indelSize, final int readLength) {
|
||||||
|
if ( indelStart < 17 || indelStart > (28 - (indelOp == 'D' ? indelSize : 0)) )
|
||||||
|
return originalCigar;
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength);
|
||||||
|
|
||||||
|
if ( indelSize == 3 && (indelOp == 'D' || indelStart % 3 == 1) )
|
||||||
|
read.setCigarString(buildTestCigarString(indelOp, 0, 16, indelSize, readLength));
|
||||||
|
else if ( (indelOp == 'I' && indelSize == 4 && indelStart % 3 == 2) ||
|
||||||
|
(indelOp == 'I' && indelSize == 2 && indelStart % 3 == 0) ||
|
||||||
|
(indelOp == 'I' && indelSize == 1 && indelStart < 28 && indelStart % 3 == 2) )
|
||||||
|
read.setCigarString(buildTestCigarString(indelOp, 0, Math.max(indelStart - indelSize, 16), indelSize, readLength));
|
||||||
|
else
|
||||||
|
return originalCigar;
|
||||||
|
|
||||||
|
return read.getCigar();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte[] makeReadString(final byte[] reference, final char indelOp, final int indelStart, final int indelSize, final int readLength, final int repeatLength) {
|
||||||
|
final byte[] readString = new byte[readLength];
|
||||||
|
|
||||||
|
if ( indelOp == 'D' && indelSize > 0 ) {
|
||||||
|
System.arraycopy(reference, 0, readString, 0, indelStart);
|
||||||
|
System.arraycopy(reference, indelStart + indelSize, readString, indelStart, readLength - indelStart);
|
||||||
|
} else if ( indelOp == 'I' && indelSize > 0 ) {
|
||||||
|
System.arraycopy(reference, 0, readString, 0, indelStart);
|
||||||
|
for ( int i = 0; i < indelSize; i++ ) {
|
||||||
|
if ( i % repeatLength == 0 )
|
||||||
|
readString[indelStart + i] = 'X';
|
||||||
|
else if ( i % repeatLength == 1 )
|
||||||
|
readString[indelStart + i] = 'Y';
|
||||||
|
else
|
||||||
|
readString[indelStart + i] = 'Z';
|
||||||
|
}
|
||||||
|
System.arraycopy(reference, indelStart, readString, indelStart + indelSize, readLength - indelStart - indelSize);
|
||||||
|
} else {
|
||||||
|
System.arraycopy(reference, 0, readString, 0, readLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
return readString;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "LeftAlignIndelDataProvider", enabled = true)
|
||||||
|
public void testLeftAlignIndelData(final Cigar originalCigar, final Cigar expectedCigar, final byte[] reference, final byte[] read, final int repeatLength) {
|
||||||
|
final Cigar actualCigar = AlignmentUtils.leftAlignIndel(originalCigar, reference, read, 0, 0, true);
|
||||||
|
Assert.assertTrue(expectedCigar.equals(actualCigar), "Wrong left alignment detected for cigar " + originalCigar.toString() + " to " + actualCigar.toString() + " but expected " + expectedCigar.toString() + " with repeat length " + repeatLength);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue