Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
6913710e89
|
|
@ -165,6 +165,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
|
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
|
||||||
private boolean requireStrictAlleleMatch = false;
|
private boolean requireStrictAlleleMatch = false;
|
||||||
|
|
||||||
|
@Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
|
||||||
|
private boolean keepSitesWithAC0 = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
|
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
|
||||||
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
|
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
|
||||||
|
|
@ -580,4 +583,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
public GenomeAnalysisEngine getToolkit() {
|
public GenomeAnalysisEngine getToolkit() {
|
||||||
return super.getToolkit();
|
return super.getToolkit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean ignoreAC0Sites() {
|
||||||
|
return ! keepSitesWithAC0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -93,7 +93,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||||
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
||||||
// added in a proxy check for monomorphic status here.
|
// added in a proxy check for monomorphic status here.
|
||||||
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
||||||
if ( vc1.isMonomorphicInSamples() ) {
|
if ( getWalker().ignoreAC0Sites() && vc1.isMonomorphicInSamples() ) {
|
||||||
nRefLoci++;
|
nRefLoci++;
|
||||||
} else {
|
} else {
|
||||||
switch (vc1.getType()) {
|
switch (vc1.getType()) {
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ public class IndelLengthHistogram extends VariantEvaluator implements StandardEv
|
||||||
public TreeMap<Object, Object> results;
|
public TreeMap<Object, Object> results;
|
||||||
|
|
||||||
public final static int MAX_SIZE_FOR_HISTOGRAM = 10;
|
public final static int MAX_SIZE_FOR_HISTOGRAM = 10;
|
||||||
|
private final static boolean INCLUDE_LONG_EVENTS_AT_MAX_SIZE = false;
|
||||||
|
|
||||||
public IndelLengthHistogram() {
|
public IndelLengthHistogram() {
|
||||||
initializeCounts(MAX_SIZE_FOR_HISTOGRAM);
|
initializeCounts(MAX_SIZE_FOR_HISTOGRAM);
|
||||||
|
|
@ -85,6 +86,8 @@ public class IndelLengthHistogram extends VariantEvaluator implements StandardEv
|
||||||
@Override
|
@Override
|
||||||
public void update1(final VariantContext eval, final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
public void update1(final VariantContext eval, final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||||
if ( eval.isIndel() && ! eval.isComplexIndel() ) {
|
if ( eval.isIndel() && ! eval.isComplexIndel() ) {
|
||||||
|
if ( ! ( getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples() )) {
|
||||||
|
// only if we are actually polymorphic in the subsetted samples should we count the allele
|
||||||
for ( Allele alt : eval.getAlternateAlleles() ) {
|
for ( Allele alt : eval.getAlternateAlleles() ) {
|
||||||
final int alleleSize = alt.length() - eval.getReference().length();
|
final int alleleSize = alt.length() - eval.getReference().length();
|
||||||
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
|
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
|
||||||
|
|
@ -92,19 +95,26 @@ public class IndelLengthHistogram extends VariantEvaluator implements StandardEv
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the histogram with the implied length of the indel allele between ref and alt (alt.len - ref.len).
|
* Update the histogram with the implied length of the indel allele between ref and alt (alt.len - ref.len).
|
||||||
*
|
*
|
||||||
* If this size is outside of MAX_SIZE_FOR_HISTOGRAM, the size is capped to MAX_SIZE_FOR_HISTOGRAM
|
* If this size is outside of MAX_SIZE_FOR_HISTOGRAM, the size is capped to MAX_SIZE_FOR_HISTOGRAM,
|
||||||
|
* if INCLUDE_LONG_EVENTS_AT_MAX_SIZE is set.
|
||||||
*
|
*
|
||||||
* @param ref
|
* @param ref
|
||||||
* @param alt
|
* @param alt
|
||||||
*/
|
*/
|
||||||
public void updateLengthHistogram(final Allele ref, final Allele alt) {
|
public void updateLengthHistogram(final Allele ref, final Allele alt) {
|
||||||
int len = alt.length() - ref.length();
|
int len = alt.length() - ref.length();
|
||||||
|
if ( INCLUDE_LONG_EVENTS_AT_MAX_SIZE ) {
|
||||||
if ( len > MAX_SIZE_FOR_HISTOGRAM ) len = MAX_SIZE_FOR_HISTOGRAM;
|
if ( len > MAX_SIZE_FOR_HISTOGRAM ) len = MAX_SIZE_FOR_HISTOGRAM;
|
||||||
if ( len < -MAX_SIZE_FOR_HISTOGRAM ) len = -MAX_SIZE_FOR_HISTOGRAM;
|
if ( len < -MAX_SIZE_FOR_HISTOGRAM ) len = -MAX_SIZE_FOR_HISTOGRAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Math.abs(len) > MAX_SIZE_FOR_HISTOGRAM )
|
||||||
|
return;
|
||||||
|
|
||||||
nIndels++;
|
nIndels++;
|
||||||
counts.put(len, counts.get(len) + 1);
|
counts.put(len, counts.get(len) + 1);
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
@Override public int getComparisonOrder() { return 2; }
|
@Override public int getComparisonOrder() { return 2; }
|
||||||
|
|
||||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( eval == null || eval.isMonomorphicInSamples() )
|
if ( eval == null || (getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples()) )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// update counts
|
// update counts
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
||||||
@Override public int getComparisonOrder() { return 2; }
|
@Override public int getComparisonOrder() { return 2; }
|
||||||
|
|
||||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( eval == null || eval.isMonomorphicInSamples() )
|
if ( eval == null || (getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples()) )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// update counts
|
// update counts
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphicInSamples()) {
|
if (vc == null || !vc.isSNP() || (getWalker().ignoreAC0Sites() && vc.isMonomorphicInSamples())) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -207,7 +207,8 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( eval == null || eval.isMonomorphicInSamples() ) return;
|
if ( eval == null || (getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples()) )
|
||||||
|
return;
|
||||||
|
|
||||||
final Type type = getType(eval);
|
final Type type = getType(eval);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -41,6 +42,8 @@ public class Haplotype {
|
||||||
private GenomeLoc genomeLocation = null;
|
private GenomeLoc genomeLocation = null;
|
||||||
private HashMap<String, double[]> readLikelihoodsPerSample = null;
|
private HashMap<String, double[]> readLikelihoodsPerSample = null;
|
||||||
private boolean isRef = false;
|
private boolean isRef = false;
|
||||||
|
private Cigar cigar;
|
||||||
|
private int alignmentStartHapwrtRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
||||||
|
|
@ -112,11 +115,7 @@ public class Haplotype {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
String returnString = "";
|
return new String(bases);
|
||||||
for(int iii = 0; iii < bases.length; iii++) {
|
|
||||||
returnString += (char) bases[iii];
|
|
||||||
}
|
|
||||||
return returnString;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] getQuals() {
|
public double[] getQuals() {
|
||||||
|
|
@ -134,11 +133,27 @@ public class Haplotype {
|
||||||
return genomeLocation.getStop();
|
return genomeLocation.getStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
|
public int getAlignmentStartHapwrtRef() {
|
||||||
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
|
return alignmentStartHapwrtRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAlignmentStartHapwrtRef( final int alignmentStartHapwrtRef ) {
|
||||||
|
this.alignmentStartHapwrtRef = alignmentStartHapwrtRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Cigar getCigar() {
|
||||||
|
return cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCigar( final Cigar cigar ) {
|
||||||
|
this.cigar = cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"refInsertLocation >= 0"})
|
||||||
|
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
|
||||||
|
|
||||||
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
||||||
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation);
|
int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
|
||||||
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||||
return bases.clone();
|
return bases.clone();
|
||||||
}
|
}
|
||||||
|
|
@ -233,85 +248,4 @@ public class Haplotype {
|
||||||
|
|
||||||
return haplotypeMap;
|
return haplotypeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
// BUGBUG: copied from ReadClipper and slightly modified since we don't have the data in a GATKSAMRecord
|
|
||||||
private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
|
|
||||||
int readBases = 0;
|
|
||||||
int refBases = 0;
|
|
||||||
boolean fallsInsideDeletion = false;
|
|
||||||
|
|
||||||
int goal = refCoord - haplotypeStart; // The goal is to move this many reference bases
|
|
||||||
boolean goalReached = refBases == goal;
|
|
||||||
|
|
||||||
Iterator<CigarElement> cigarElementIterator = haplotypeCigar.getCigarElements().iterator();
|
|
||||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
|
||||||
CigarElement cigarElement = cigarElementIterator.next();
|
|
||||||
int shift = 0;
|
|
||||||
|
|
||||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
|
|
||||||
if (refBases + cigarElement.getLength() < goal)
|
|
||||||
shift = cigarElement.getLength();
|
|
||||||
else
|
|
||||||
shift = goal - refBases;
|
|
||||||
|
|
||||||
refBases += shift;
|
|
||||||
}
|
|
||||||
goalReached = refBases == goal;
|
|
||||||
|
|
||||||
if (!goalReached && cigarElement.getOperator().consumesReadBases())
|
|
||||||
readBases += cigarElement.getLength();
|
|
||||||
|
|
||||||
if (goalReached) {
|
|
||||||
// Is this base's reference position within this cigar element? Or did we use it all?
|
|
||||||
boolean endsWithinCigar = shift < cigarElement.getLength();
|
|
||||||
|
|
||||||
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
|
||||||
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
|
||||||
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
CigarElement nextCigarElement;
|
|
||||||
|
|
||||||
// if we end inside the current cigar element, we just have to check if it is a deletion
|
|
||||||
if (endsWithinCigar)
|
|
||||||
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
|
||||||
|
|
||||||
// if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
|
|
||||||
else {
|
|
||||||
nextCigarElement = cigarElementIterator.next();
|
|
||||||
|
|
||||||
// if it's an insertion, we need to clip the whole insertion before looking at the next element
|
|
||||||
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
|
|
||||||
readBases += nextCigarElement.getLength();
|
|
||||||
if (!cigarElementIterator.hasNext())
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
nextCigarElement = cigarElementIterator.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
// if it's a deletion, we will pass the information on to be handled downstream.
|
|
||||||
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we reached our goal outside a deletion, add the shift
|
|
||||||
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
|
||||||
readBases += shift;
|
|
||||||
|
|
||||||
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
|
||||||
// to add the shift of the current cigar element but go back to it's last element to return the last
|
|
||||||
// base before the deletion (see warning in function contracts)
|
|
||||||
else if (fallsInsideDeletion && !endsWithinCigar)
|
|
||||||
readBases += shift - 1;
|
|
||||||
|
|
||||||
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
|
|
||||||
else if (fallsInsideDeletion && endsWithinCigar)
|
|
||||||
readBases--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!goalReached)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return (fallsInsideDeletion ? -1 : readBases);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,7 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int DEFAULT_ADAPTOR_SIZE = 100;
|
private static int DEFAULT_ADAPTOR_SIZE = 100;
|
||||||
|
public static int CLIPPING_GOAL_NOT_REACHED = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A marker to tell which end of the read has been clipped
|
* A marker to tell which end of the read has been clipped
|
||||||
|
|
@ -362,7 +363,11 @@ public class ReadUtils {
|
||||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"})
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"})
|
||||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
||||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, tail, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int getReadCoordinateForReferenceCoordinate(final int alignmentStart, final Cigar cigar, final int refCoord, final ClippingTail tail, final boolean allowGoalNotReached) {
|
||||||
|
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(alignmentStart, cigar, refCoord, allowGoalNotReached);
|
||||||
int readCoord = result.getFirst();
|
int readCoord = result.getFirst();
|
||||||
|
|
||||||
// Corner case one: clipping the right tail and falls on deletion, move to the next
|
// Corner case one: clipping the right tail and falls on deletion, move to the next
|
||||||
|
|
@ -374,9 +379,9 @@ public class ReadUtils {
|
||||||
// clipping the left tail and first base is insertion, go to the next read coordinate
|
// clipping the left tail and first base is insertion, go to the next read coordinate
|
||||||
// with the same reference coordinate. Advance to the next cigar element, or to the
|
// with the same reference coordinate. Advance to the next cigar element, or to the
|
||||||
// end of the read if there is no next element.
|
// end of the read if there is no next element.
|
||||||
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(read);
|
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(cigar);
|
||||||
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
||||||
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), read.getReadLength() - 1);
|
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1);
|
||||||
|
|
||||||
return readCoord;
|
return readCoord;
|
||||||
}
|
}
|
||||||
|
|
@ -400,14 +405,25 @@ public class ReadUtils {
|
||||||
@Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
|
@Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
|
||||||
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
|
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
|
||||||
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
|
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
|
||||||
|
return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(final int alignmentStart, final Cigar cigar, final int refCoord, final boolean allowGoalNotReached) {
|
||||||
int readBases = 0;
|
int readBases = 0;
|
||||||
int refBases = 0;
|
int refBases = 0;
|
||||||
boolean fallsInsideDeletion = false;
|
boolean fallsInsideDeletion = false;
|
||||||
|
|
||||||
int goal = refCoord - read.getSoftStart(); // The goal is to move this many reference bases
|
int goal = refCoord - alignmentStart; // The goal is to move this many reference bases
|
||||||
|
if (goal < 0) {
|
||||||
|
if (allowGoalNotReached) {
|
||||||
|
return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
|
||||||
|
} else {
|
||||||
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
}
|
||||||
|
}
|
||||||
boolean goalReached = refBases == goal;
|
boolean goalReached = refBases == goal;
|
||||||
|
|
||||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
|
||||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||||
CigarElement cigarElement = cigarElementIterator.next();
|
CigarElement cigarElement = cigarElementIterator.next();
|
||||||
int shift = 0;
|
int shift = 0;
|
||||||
|
|
@ -431,8 +447,13 @@ public class ReadUtils {
|
||||||
|
|
||||||
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
||||||
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
||||||
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
if (!endsWithinCigar && !cigarElementIterator.hasNext()) {
|
||||||
|
if (allowGoalNotReached) {
|
||||||
|
return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
|
||||||
|
} else {
|
||||||
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CigarElement nextCigarElement;
|
CigarElement nextCigarElement;
|
||||||
|
|
||||||
|
|
@ -447,8 +468,13 @@ public class ReadUtils {
|
||||||
// if it's an insertion, we need to clip the whole insertion before looking at the next element
|
// if it's an insertion, we need to clip the whole insertion before looking at the next element
|
||||||
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
|
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||||
readBases += nextCigarElement.getLength();
|
readBases += nextCigarElement.getLength();
|
||||||
if (!cigarElementIterator.hasNext())
|
if (!cigarElementIterator.hasNext()) {
|
||||||
|
if (allowGoalNotReached) {
|
||||||
|
return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
|
||||||
|
} else {
|
||||||
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nextCigarElement = cigarElementIterator.next();
|
nextCigarElement = cigarElementIterator.next();
|
||||||
}
|
}
|
||||||
|
|
@ -473,8 +499,13 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!goalReached)
|
if (!goalReached) {
|
||||||
|
if (allowGoalNotReached) {
|
||||||
|
return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
|
||||||
|
} else {
|
||||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||||
}
|
}
|
||||||
|
|
@ -527,7 +558,11 @@ public class ReadUtils {
|
||||||
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
||||||
*/
|
*/
|
||||||
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
||||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
return readStartsWithInsertion(read.getCigar());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) {
|
||||||
|
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -488,7 +488,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("7c01565531cf82c8c03cf042903b96cf")
|
Arrays.asList("41a37636868a838a632559949c5216cf")
|
||||||
);
|
);
|
||||||
executeTest("testModernVCFWithLargeIndels", spec);
|
executeTest("testModernVCFWithLargeIndels", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -506,4 +506,21 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
UserException.class);
|
UserException.class);
|
||||||
executeTest("testIncompatibleEvalAndStrat", spec);
|
executeTest("testIncompatibleEvalAndStrat", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testIncludingAC0(boolean includeAC0, final String md5) {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T VariantEval",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"-eval " + testDir + "/ac0.vcf",
|
||||||
|
"-L 20:81006 -noST -noEV -EV VariantSummary -o %s" + (includeAC0 ? " -keepAC0" : "")
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
Arrays.asList(md5));
|
||||||
|
executeTest("testIncludingAC0 keep ac 0 = " + includeAC0, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testWithAC0() { testIncludingAC0(true, "0ed2c8e4b4e06973a06838bc930a132d"); }
|
||||||
|
@Test public void testWithoutAC0() { testIncludingAC0(false, "79d28ddd0ab9584776b6cbefe48331df"); }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -152,7 +152,9 @@ public class HaplotypeUnitTest extends BaseTest {
|
||||||
final Haplotype h = new Haplotype(hap.getBytes());
|
final Haplotype h = new Haplotype(hap.getBytes());
|
||||||
final Allele h1refAllele = Allele.create(ref, true);
|
final Allele h1refAllele = Allele.create(ref, true);
|
||||||
final Allele h1altAllele = Allele.create(alt, false);
|
final Allele h1altAllele = Allele.create(alt, false);
|
||||||
final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) );
|
h.setAlignmentStartHapwrtRef(0);
|
||||||
|
h.setCigar(cigar);
|
||||||
|
final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE) );
|
||||||
final Haplotype h1expected = new Haplotype(newHap.getBytes());
|
final Haplotype h1expected = new Haplotype(newHap.getBytes());
|
||||||
Assert.assertEquals(h1, h1expected);
|
Assert.assertEquals(h1, h1expected);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue