Fixing up and refactoring usage of indel categories. On a variant context, isInsertion() and isDeletion() are now removed because behavior before was wrong in case of multiallelic sites. Now, methods isSimpleInsertion() and isSimpleDeletion() will return true only if sites are biallelic. For multiallelic sites, isComplex() will return true in all cases.

VariantEval module CountVariants is corrected and an additional column is added so that we log mixed events and complex indels separately (before they were being conflated).
VariantEval module IndelStatistics is considerably simplified as the sample stratification was wrong and redundant, now it should work with the VE-generic Sample stratification. Several columns are renamed or removed since they're not really useful
This commit is contained in:
Guillermo del Angel 2011-08-18 16:17:38 -04:00
parent c193f52e5d
commit 3dfb60a46e
17 changed files with 75 additions and 342 deletions

View File

@ -90,7 +90,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
}
// todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.size();
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
if ( refCount + altCount == 0 ) {
continue;

View File

@ -79,7 +79,7 @@ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnot
GenomeLoc locus = ref.getLocus();
GenomeLoc window = ref.getWindow();
int refBasePos = (int) (locus.getStart() - window.getStart())+1;
if ( vc.isDeletion() ) {
if ( vc.isSimpleDeletion() ) {
// check that deleted bases are the same
byte dBase = bases[refBasePos];
for ( int i = 0; i < vc.getReference().length(); i ++ ) {

View File

@ -36,9 +36,9 @@ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnota
if (!vc.isBiallelic())
type = "MULTIALLELIC_INDEL";
else {
if (vc.isInsertion())
if (vc.isSimpleInsertion())
type = "INS.";
else if (vc.isDeletion())
else if (vc.isSimpleDeletion())
type = "DEL.";
else
type = "OTHER.";

View File

@ -101,11 +101,11 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
if ( vc.isFiltered() )
continue;
if ( vc.isDeletion()) {
if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length();
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isInsertion()) {
} else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());

View File

@ -877,7 +877,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
for ( VariantContext knownIndel : knownIndelsToTry ) {
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
continue;
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
int start = knownIndel.getStart() - leftmostIndex + 1;
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
if ( c != null )
@ -1079,11 +1079,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
if ( indexOnRef > 0 )
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
if ( indel.isDeletion() ) {
if ( indel.isSimpleDeletion() ) {
refIdx += indelStr.length;
cigar.add(new CigarElement(indelStr.length, CigarOperator.D));
}
else if ( indel.isInsertion() ) {
else if ( indel.isSimpleInsertion() ) {
for ( byte b : indelStr )
sb.append((char)b);
cigar.add(new CigarElement(indelStr.length, CigarOperator.I));

View File

@ -178,7 +178,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
switch ( vc.getType() ) {
case INDEL:
hasIndel = true;
if ( vc.isInsertion() )
if ( vc.isSimpleInsertion() )
hasInsertion = true;
break;
case SNP:
@ -187,7 +187,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
case MIXED:
hasPointEvent = true;
hasIndel = true;
if ( vc.isInsertion() )
if ( vc.isSimpleInsertion() )
hasInsertion = true;
break;
default:

View File

@ -16,7 +16,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.BaseUtils;
@ -195,17 +194,17 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
} else /* (mask != null && validate == null ) */ {
if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) {
logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed.");
logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles())));
logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isSimpleInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles())));
sequenceInvalid = true;
invReason.add(mask.isInsertion() ? "INSERTION" : "DELETION");
invReason.add(mask.isSimpleInsertion() ? "INSERTION" : "DELETION");
// note: indelCounter could be > 0 (could have small deletion within larger one). This always selects
// the larger event.
int indelCounterNew = mask.isInsertion() ? 2 : mask.getEnd()-mask.getStart();
int indelCounterNew = mask.isSimpleInsertion() ? 2 : mask.getEnd()-mask.getStart();
if ( indelCounterNew > indelCounter ) {
indelCounter = indelCounterNew;
}
//sequence.append((char) ref.getBase());
//sequence.append(mask.isInsertion() ? 'I' : 'D');
//sequence.append(mask.isSimpleInsertion() ? 'I' : 'D');
sequence.append("N");
indelCounter--;
rawSequence.append(Character.toUpperCase((char) ref.getBase()));

View File

@ -39,8 +39,10 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
public long nInsertions = 0;
@DataPoint(description = "Number of deletions")
public long nDeletions = 0;
@DataPoint(description = "Number of complex loci")
@DataPoint(description = "Number of complex indels")
public long nComplex = 0;
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)")
public long nMixed = 0;
@DataPoint(description = "Number of no calls loci")
@ -113,11 +115,15 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
break;
case INDEL:
if (vc1.isInsertion()) nInsertions++;
else nDeletions++;
if (vc1.isSimpleInsertion())
nInsertions++;
else if (vc1.isSimpleDeletion())
nDeletions++;
else
nComplex++;
break;
case MIXED:
nComplex++;
nMixed++;
break;
default:
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
@ -180,8 +186,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
heterozygosity = perLocusRate(nHets);
heterozygosityPerBp = perLocusRInverseRate(nHets);
hetHomRatio = ratio(nHets, nHomVar);
indelRate = perLocusRate(nDeletions + nInsertions);
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions);
indelRate = perLocusRate(nDeletions + nInsertions + nComplex);
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex);
deletionInsertionRatio = ratio(nDeletions, nInsertions);
}
}

View File

@ -96,9 +96,9 @@ public class IndelLengthHistogram extends VariantEvaluator {
}
if ( vc1.isIndel() ) {
if ( vc1.isInsertion() ) {
if ( vc1.isSimpleInsertion() ) {
indelHistogram.update(vc1.getAlternateAllele(0).length());
} else if ( vc1.isDeletion() ) {
} else if ( vc1.isSimpleDeletion() ) {
indelHistogram.update(-vc1.getReference().length());
} else {
throw new ReviewedStingException("Indel type that is not insertion or deletion.");

View File

@ -1,221 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author delangel
* @since Apr 11, 2010
*/
@Analysis(name = "Indel Metrics by allele count", description = "Shows various stats binned by allele count")
public class IndelMetricsByAC extends VariantEvaluator {
// a mapping from quality score histogram bin to Ti/Tv ratio
@DataPoint(description = "Indel Metrics by allele count")
IndelMetricsByAc metrics = null;
int numSamples = 0;
public void initialize(VariantEvalWalker walker) {
numSamples = walker.getNumSamples();
}
//@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count")
//AlleleCountStats alleleCountStats = null;
private static final int INDEL_SIZE_LIMIT = 100;
private static final int NUM_SCALAR_COLUMNS = 6;
static int len2Index(int ind) {
return ind+INDEL_SIZE_LIMIT;
}
static int index2len(int ind) {
return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS;
}
protected final static String[] METRIC_COLUMNS;
static {
METRIC_COLUMNS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
METRIC_COLUMNS[0] = "AC";
METRIC_COLUMNS[1] = "nIns";
METRIC_COLUMNS[2] = "nDels";
METRIC_COLUMNS[3] = "n";
METRIC_COLUMNS[4] = "nComplex";
METRIC_COLUMNS[5] = "nLong";
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
METRIC_COLUMNS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
}
class IndelMetricsAtAC {
public int ac = -1, nIns =0, nDel = 0, nComplex = 0, nLong;
public int sizeCount[] = new int[2*INDEL_SIZE_LIMIT+1];
public IndelMetricsAtAC(int ac) { this.ac = ac; }
public void update(VariantContext eval) {
int eventLength = 0;
if ( eval.isInsertion() ) {
eventLength = eval.getAlternateAllele(0).length();
nIns++;
} else if ( eval.isDeletion() ) {
eventLength = -eval.getReference().length();
nDel++;
}
else {
nComplex++;
}
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
sizeCount[len2Index(eventLength)]++;
else
nLong++;
}
// corresponding to METRIC_COLUMNS
public String getColumn(int i) {
if (i >= NUM_SCALAR_COLUMNS && i <=NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT)
return String.valueOf(sizeCount[i-NUM_SCALAR_COLUMNS]);
switch (i) {
case 0: return String.valueOf(ac);
case 1: return String.valueOf(nIns);
case 2: return String.valueOf(nDel);
case 3: return String.valueOf(nIns + nDel);
case 4: return String.valueOf(nComplex);
case 5: return String.valueOf(nLong);
default:
throw new ReviewedStingException("Unexpected column " + i);
}
}
}
class IndelMetricsByAc implements TableType {
ArrayList<IndelMetricsAtAC> metrics = new ArrayList<IndelMetricsAtAC>();
Object[] rows = null;
public IndelMetricsByAc( int nchromosomes ) {
rows = new Object[nchromosomes+1];
metrics = new ArrayList<IndelMetricsAtAC>(nchromosomes+1);
for ( int i = 0; i < nchromosomes + 1; i++ ) {
metrics.add(new IndelMetricsAtAC(i));
rows[i] = "ac" + i;
}
}
public Object[] getRowKeys() {
return rows;
}
public Object[] getColumnKeys() {
return METRIC_COLUMNS;
}
public String getName() {
return "IndelMetricsByAc";
}
//
public String getCell(int ac, int y) {
return metrics.get(ac).getColumn(y);
}
public String toString() {
return "";
}
public void incrValue( VariantContext eval ) {
int ac = -1;
if ( eval.hasGenotypes() )
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
else if ( eval.hasAttribute("AC") ) {
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
}
if ( ac != -1 )
metrics.get(ac).update(eval);
}
}
//public IndelMetricsByAC(VariantEvalWalker parent) {
//super(parent);
// don't do anything
//}
public String getName() {
return "IndelMetricsByAC";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public boolean enabled() {
return true;
}
public String toString() {
return getName();
}
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null;
if (eval != null ) {
if ( metrics == null ) {
int nSamples = numSamples;
//int nSamples = 2;
if ( nSamples != -1 )
metrics = new IndelMetricsByAc(2 * nSamples);
}
if ( eval.isIndel() && eval.isBiallelic() &&
metrics != null ) {
metrics.incrValue(eval);
}
}
return interesting; // This module doesn't capture any interesting sites, so return null
}
//public void finalizeEvaluation() {
//
//}
}

View File

@ -44,7 +44,7 @@ public class IndelStatistics extends VariantEvaluator {
@DataPoint(description = "Indel Statistics")
IndelStats indelStats = null;
@DataPoint(description = "Indel Classification")
// @DataPoint(description = "Indel Classification")
IndelClasses indelClasses = null;
int numSamples = 0;
@ -79,8 +79,7 @@ public class IndelStatistics extends VariantEvaluator {
}
static class IndelStats implements TableType {
protected final static String ALL_SAMPLES_KEY = "allSamples";
protected final static String[] COLUMN_KEYS;
protected final static String[] COLUMN_KEYS;
static {
COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
@ -104,13 +103,10 @@ public class IndelStatistics extends VariantEvaluator {
}
// map of sample to statistics
protected final HashMap<String, int[]> indelSummary = new HashMap<String, int[]>();
protected final int[] indelSummary;
public IndelStats(final VariantContext vc) {
indelSummary.put(ALL_SAMPLES_KEY, new int[COLUMN_KEYS.length]);
for( final String sample : vc.getGenotypes().keySet() ) {
indelSummary.put(sample, new int[COLUMN_KEYS.length]);
}
indelSummary = new int[COLUMN_KEYS.length];
}
/**
@ -118,18 +114,18 @@ public class IndelStatistics extends VariantEvaluator {
* @return one row per sample
*/
public Object[] getRowKeys() {
return indelSummary.keySet().toArray(new String[indelSummary.size()]);
return new String[]{"all"};
}
public Object getCell(int x, int y) {
final Object[] rowKeys = getRowKeys();
if (y == IND_AT_CG_RATIO) {
int at = indelSummary.get(rowKeys[x])[IND_AT_EXP];
int cg = indelSummary.get(rowKeys[x])[IND_CG_EXP];
int at = indelSummary[IND_AT_EXP];
int cg = indelSummary[IND_CG_EXP];
return String.format("%4.2f",((double)at) / (Math.max(cg, 1)));
}
else
return String.format("%d",indelSummary.get(rowKeys[x])[y]);
return String.format("%d",indelSummary[y]);
}
@ -160,78 +156,31 @@ public class IndelStatistics extends VariantEvaluator {
int eventLength = 0;
boolean isInsertion = false, isDeletion = false;
if ( vc.isInsertion() ) {
if ( vc.isSimpleInsertion() ) {
eventLength = vc.getAlternateAllele(0).length();
indelSummary.get(ALL_SAMPLES_KEY)[IND_INS]++;
indelSummary[IND_INS]++;
isInsertion = true;
} else if ( vc.isDeletion() ) {
indelSummary.get(ALL_SAMPLES_KEY)[IND_DEL]++;
} else if ( vc.isSimpleDeletion() ) {
indelSummary[IND_DEL]++;
eventLength = -vc.getReference().length();
isDeletion = true;
}
else {
indelSummary.get(ALL_SAMPLES_KEY)[IND_COMPLEX]++;
indelSummary[IND_COMPLEX]++;
}
if (IndelUtils.isATExpansion(vc,ref))
indelSummary.get(ALL_SAMPLES_KEY)[IND_AT_EXP]++;
indelSummary[IND_AT_EXP]++;
if (IndelUtils.isCGExpansion(vc,ref))
indelSummary.get(ALL_SAMPLES_KEY)[IND_CG_EXP]++;
indelSummary[IND_CG_EXP]++;
// make sure event doesn't overstep array boundaries
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) {
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
indelSummary[len2Index(eventLength)]++;
if (eventLength % 3 != 0)
indelSummary.get(ALL_SAMPLES_KEY)[IND_FRAMESHIFT]++;
indelSummary[IND_FRAMESHIFT]++;
}
else
indelSummary.get(ALL_SAMPLES_KEY)[IND_LONG]++;
for( final String sample : vc.getGenotypes().keySet() ) {
if ( indelSummary.containsKey(sample) ) {
Genotype g = vc.getGenotype(sample);
boolean isVariant = (g.isCalled() && !g.isHomRef());
if (isVariant) {
// update ins/del count
if (isInsertion) {
indelSummary.get(sample)[IND_INS]++;
}
else if (isDeletion)
indelSummary.get(sample)[IND_DEL]++;
else
indelSummary.get(sample)[IND_COMPLEX]++;
// update histogram
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) {
indelSummary.get(sample)[len2Index(eventLength)]++;
if (eventLength % 3 != 0)
indelSummary.get(sample)[IND_FRAMESHIFT]++;
}
else
indelSummary.get(sample)[IND_LONG]++;
if (g.isHet())
if (isInsertion)
indelSummary.get(sample)[IND_HET_INS]++;
else if (isDeletion)
indelSummary.get(sample)[IND_HET_DEL]++;
else
if (isInsertion)
indelSummary.get(sample)[IND_HOM_INS]++;
else if (isDeletion)
indelSummary.get(sample)[IND_HOM_DEL]++;
if (IndelUtils.isATExpansion(vc,ref))
indelSummary.get(sample)[IND_AT_EXP]++;
if (IndelUtils.isCGExpansion(vc,ref))
indelSummary.get(sample)[IND_CG_EXP]++;
}
else
indelSummary.get(sample)[IND_HOM_REF]++;
}
}
indelSummary[IND_LONG]++;
}

View File

@ -133,7 +133,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
// get the indel length
int indelLength;
if ( vc.isDeletion() )
if ( vc.isSimpleDeletion() )
indelLength = vc.getReference().length();
else
indelLength = vc.getAlternateAllele(0).length();
@ -150,7 +150,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
// create a CIGAR string to represent the event
ArrayList<CigarElement> elements = new ArrayList<CigarElement>();
elements.add(new CigarElement(originalIndex, CigarOperator.M));
elements.add(new CigarElement(indelLength, vc.isDeletion() ? CigarOperator.D : CigarOperator.I));
elements.add(new CigarElement(indelLength, vc.isSimpleDeletion() ? CigarOperator.D : CigarOperator.I));
elements.add(new CigarElement(refSeq.length - originalIndex, CigarOperator.M));
Cigar originalCigar = new Cigar(elements);
@ -165,8 +165,8 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
int indelIndex = originalIndex-difference;
byte[] newBases = new byte[indelLength];
System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
Allele newAllele = Allele.create(newBases, vc.isDeletion());
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
writer.add(newVC);
@ -178,14 +178,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
}
private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRef, int indelLength) {
byte[] hap = new byte[ref.length + (indelLength * (vc.isDeletion() ? -1 : 1))];
byte[] hap = new byte[ref.length + (indelLength * (vc.isSimpleDeletion() ? -1 : 1))];
// add the bases before the indel
System.arraycopy(ref, 0, hap, 0, indexOfRef);
int currentPos = indexOfRef;
// take care of the indel
if ( vc.isDeletion() ) {
if ( vc.isSimpleDeletion() ) {
indexOfRef += indelLength;
} else {
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);

View File

@ -137,11 +137,11 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
Allele reportedRefAllele = vc.getReference();
Allele observedRefAllele;
// insertions
if ( vc.isInsertion() ) {
if ( vc.isSimpleInsertion() ) {
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
}
// deletions
else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) {
else if ( vc.isSimpleDeletion() || vc.isMixed() || vc.isMNP() ) {
// we can't validate arbitrarily long deletions
if ( reportedRefAllele.length() > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));

View File

@ -168,8 +168,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
continue;
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isInsertion()));
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isInsertion()));
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
hapmap.setActualAlleles(alleleMap);
// also, use the correct positioning for insertions

View File

@ -121,9 +121,9 @@ public class IndelUtils {
boolean done = false;
ArrayList<Integer> inds = new ArrayList<Integer>();
if ( vc.isInsertion() ) {
if ( vc.isSimpleInsertion() ) {
indelAlleleString = vc.getAlternateAllele(0).getDisplayString();
} else if ( vc.isDeletion() ) {
} else if ( vc.isSimpleDeletion() ) {
indelAlleleString = vc.getReference().getDisplayString();
}
else {

View File

@ -583,24 +583,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati
/**
* @return true if the alleles indicate a simple insertion (i.e., the reference allele is Null)
*/
public boolean isInsertion() {
// can't just call !isDeletion() because of complex indels
return getType() == Type.INDEL && getReference().isNull();
public boolean isSimpleInsertion() {
// can't just call !isSimpleDeletion() because of complex indels
return getType() == Type.INDEL && getReference().isNull() && isBiallelic();
}
/**
* @return true if the alleles indicate a simple deletion (i.e., a single alt allele that is Null)
*/
public boolean isDeletion() {
// can't just call !isInsertion() because of complex indels
return getType() == Type.INDEL && getAlternateAllele(0).isNull();
public boolean isSimpleDeletion() {
// can't just call !isSimpleInsertion() because of complex indels
return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic();
}
/**
* @return true if the alleles indicate neither a simple deletion nor a simple insertion
*/
public boolean isComplexIndel() {
return isIndel() && !isDeletion() && !isInsertion();
return isIndel() && !isSimpleDeletion() && !isSimpleInsertion();
}
public boolean isSymbolic() {

View File

@ -146,8 +146,8 @@ public class VariantContextUnitTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
Assert.assertTrue(vc.isSNP());
Assert.assertFalse(vc.isIndel());
Assert.assertFalse(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
@ -173,8 +173,8 @@ public class VariantContextUnitTest {
Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType());
Assert.assertFalse(vc.isSNP());
Assert.assertFalse(vc.isIndel());
Assert.assertFalse(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertFalse(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 1);
@ -199,8 +199,8 @@ public class VariantContextUnitTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertFalse(vc.isInsertion());
Assert.assertTrue(vc.isDeletion());
Assert.assertFalse(vc.isSimpleInsertion());
Assert.assertTrue(vc.isSimpleDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
@ -226,8 +226,8 @@ public class VariantContextUnitTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertTrue(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertTrue(vc.isSimpleInsertion());
Assert.assertFalse(vc.isSimpleDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
@ -433,7 +433,7 @@ public class VariantContextUnitTest {
Assert.assertFalse(vc14.isBiallelic());
Assert.assertTrue(vc5.isIndel());
Assert.assertTrue(vc5.isDeletion());
Assert.assertTrue(vc5.isSimpleDeletion());
Assert.assertTrue(vc5.isVariant());
Assert.assertTrue(vc5.isBiallelic());