Fixing up and refactoring usage of indel categories. On a variant context, isInsertion() and isDeletion() are now removed because behavior before was wrong in case of multiallelic sites. Now, methods isSimpleInsertion() and isSimpleDeletion() will return true only if sites are biallelic. For multiallelic sites, isComplex() will return true in all cases.
VariantEval module CountVariants is corrected and an additional column is added so that we log mixed events and complex indels separately (before they were being conflated). VariantEval module IndelStatistics is considerably simplified as the sample stratification was wrong and redundant, now it should work with the VE-generic Sample stratification. Several columns are renamed or removed since they're not really useful
This commit is contained in:
parent
c193f52e5d
commit
3dfb60a46e
|
|
@ -90,7 +90,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
}
|
||||
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
||||
int refCount = indelPileup.size();
|
||||
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
|
||||
if ( refCount + altCount == 0 ) {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnot
|
|||
GenomeLoc locus = ref.getLocus();
|
||||
GenomeLoc window = ref.getWindow();
|
||||
int refBasePos = (int) (locus.getStart() - window.getStart())+1;
|
||||
if ( vc.isDeletion() ) {
|
||||
if ( vc.isSimpleDeletion() ) {
|
||||
// check that deleted bases are the same
|
||||
byte dBase = bases[refBasePos];
|
||||
for ( int i = 0; i < vc.getReference().length(); i ++ ) {
|
||||
|
|
|
|||
|
|
@ -36,9 +36,9 @@ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnota
|
|||
if (!vc.isBiallelic())
|
||||
type = "MULTIALLELIC_INDEL";
|
||||
else {
|
||||
if (vc.isInsertion())
|
||||
if (vc.isSimpleInsertion())
|
||||
type = "INS.";
|
||||
else if (vc.isDeletion())
|
||||
else if (vc.isSimpleDeletion())
|
||||
type = "DEL.";
|
||||
else
|
||||
type = "OTHER.";
|
||||
|
|
|
|||
|
|
@ -101,11 +101,11 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
|||
if ( vc.isFiltered() )
|
||||
continue;
|
||||
|
||||
if ( vc.isDeletion()) {
|
||||
if ( vc.isSimpleDeletion()) {
|
||||
deletionBasesRemaining = vc.getReference().length();
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
} else if ( vc.isInsertion()) {
|
||||
} else if ( vc.isSimpleInsertion()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
|
||||
} else if (vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
|
|
|
|||
|
|
@ -877,7 +877,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
for ( VariantContext knownIndel : knownIndelsToTry ) {
|
||||
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
|
||||
continue;
|
||||
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
||||
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
||||
int start = knownIndel.getStart() - leftmostIndex + 1;
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
|
||||
if ( c != null )
|
||||
|
|
@ -1079,11 +1079,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( indexOnRef > 0 )
|
||||
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
|
||||
|
||||
if ( indel.isDeletion() ) {
|
||||
if ( indel.isSimpleDeletion() ) {
|
||||
refIdx += indelStr.length;
|
||||
cigar.add(new CigarElement(indelStr.length, CigarOperator.D));
|
||||
}
|
||||
else if ( indel.isInsertion() ) {
|
||||
else if ( indel.isSimpleInsertion() ) {
|
||||
for ( byte b : indelStr )
|
||||
sb.append((char)b);
|
||||
cigar.add(new CigarElement(indelStr.length, CigarOperator.I));
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
switch ( vc.getType() ) {
|
||||
case INDEL:
|
||||
hasIndel = true;
|
||||
if ( vc.isInsertion() )
|
||||
if ( vc.isSimpleInsertion() )
|
||||
hasInsertion = true;
|
||||
break;
|
||||
case SNP:
|
||||
|
|
@ -187,7 +187,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
case MIXED:
|
||||
hasPointEvent = true;
|
||||
hasIndel = true;
|
||||
if ( vc.isInsertion() )
|
||||
if ( vc.isSimpleInsertion() )
|
||||
hasInsertion = true;
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -195,17 +194,17 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
} else /* (mask != null && validate == null ) */ {
|
||||
if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) {
|
||||
logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed.");
|
||||
logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles())));
|
||||
logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isSimpleInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles())));
|
||||
sequenceInvalid = true;
|
||||
invReason.add(mask.isInsertion() ? "INSERTION" : "DELETION");
|
||||
invReason.add(mask.isSimpleInsertion() ? "INSERTION" : "DELETION");
|
||||
// note: indelCounter could be > 0 (could have small deletion within larger one). This always selects
|
||||
// the larger event.
|
||||
int indelCounterNew = mask.isInsertion() ? 2 : mask.getEnd()-mask.getStart();
|
||||
int indelCounterNew = mask.isSimpleInsertion() ? 2 : mask.getEnd()-mask.getStart();
|
||||
if ( indelCounterNew > indelCounter ) {
|
||||
indelCounter = indelCounterNew;
|
||||
}
|
||||
//sequence.append((char) ref.getBase());
|
||||
//sequence.append(mask.isInsertion() ? 'I' : 'D');
|
||||
//sequence.append(mask.isSimpleInsertion() ? 'I' : 'D');
|
||||
sequence.append("N");
|
||||
indelCounter--;
|
||||
rawSequence.append(Character.toUpperCase((char) ref.getBase()));
|
||||
|
|
|
|||
|
|
@ -39,8 +39,10 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
public long nInsertions = 0;
|
||||
@DataPoint(description = "Number of deletions")
|
||||
public long nDeletions = 0;
|
||||
@DataPoint(description = "Number of complex loci")
|
||||
@DataPoint(description = "Number of complex indels")
|
||||
public long nComplex = 0;
|
||||
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)")
|
||||
public long nMixed = 0;
|
||||
|
||||
|
||||
@DataPoint(description = "Number of no calls loci")
|
||||
|
|
@ -113,11 +115,15 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
||||
break;
|
||||
case INDEL:
|
||||
if (vc1.isInsertion()) nInsertions++;
|
||||
else nDeletions++;
|
||||
if (vc1.isSimpleInsertion())
|
||||
nInsertions++;
|
||||
else if (vc1.isSimpleDeletion())
|
||||
nDeletions++;
|
||||
else
|
||||
nComplex++;
|
||||
break;
|
||||
case MIXED:
|
||||
nComplex++;
|
||||
nMixed++;
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
|
||||
|
|
@ -180,8 +186,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
heterozygosity = perLocusRate(nHets);
|
||||
heterozygosityPerBp = perLocusRInverseRate(nHets);
|
||||
hetHomRatio = ratio(nHets, nHomVar);
|
||||
indelRate = perLocusRate(nDeletions + nInsertions);
|
||||
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions);
|
||||
indelRate = perLocusRate(nDeletions + nInsertions + nComplex);
|
||||
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex);
|
||||
deletionInsertionRatio = ratio(nDeletions, nInsertions);
|
||||
}
|
||||
}
|
||||
|
|
@ -96,9 +96,9 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
}
|
||||
|
||||
if ( vc1.isIndel() ) {
|
||||
if ( vc1.isInsertion() ) {
|
||||
if ( vc1.isSimpleInsertion() ) {
|
||||
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||
} else if ( vc1.isDeletion() ) {
|
||||
} else if ( vc1.isSimpleDeletion() ) {
|
||||
indelHistogram.update(-vc1.getReference().length());
|
||||
} else {
|
||||
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
|
||||
|
|
|
|||
|
|
@ -1,221 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author delangel
|
||||
* @since Apr 11, 2010
|
||||
*/
|
||||
|
||||
@Analysis(name = "Indel Metrics by allele count", description = "Shows various stats binned by allele count")
|
||||
public class IndelMetricsByAC extends VariantEvaluator {
|
||||
// a mapping from quality score histogram bin to Ti/Tv ratio
|
||||
@DataPoint(description = "Indel Metrics by allele count")
|
||||
IndelMetricsByAc metrics = null;
|
||||
|
||||
int numSamples = 0;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
numSamples = walker.getNumSamples();
|
||||
}
|
||||
|
||||
//@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count")
|
||||
//AlleleCountStats alleleCountStats = null;
|
||||
private static final int INDEL_SIZE_LIMIT = 100;
|
||||
private static final int NUM_SCALAR_COLUMNS = 6;
|
||||
static int len2Index(int ind) {
|
||||
return ind+INDEL_SIZE_LIMIT;
|
||||
}
|
||||
|
||||
static int index2len(int ind) {
|
||||
return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS;
|
||||
}
|
||||
|
||||
protected final static String[] METRIC_COLUMNS;
|
||||
static {
|
||||
METRIC_COLUMNS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
|
||||
METRIC_COLUMNS[0] = "AC";
|
||||
METRIC_COLUMNS[1] = "nIns";
|
||||
METRIC_COLUMNS[2] = "nDels";
|
||||
METRIC_COLUMNS[3] = "n";
|
||||
METRIC_COLUMNS[4] = "nComplex";
|
||||
METRIC_COLUMNS[5] = "nLong";
|
||||
|
||||
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
|
||||
METRIC_COLUMNS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
|
||||
}
|
||||
|
||||
class IndelMetricsAtAC {
|
||||
public int ac = -1, nIns =0, nDel = 0, nComplex = 0, nLong;
|
||||
public int sizeCount[] = new int[2*INDEL_SIZE_LIMIT+1];
|
||||
|
||||
public IndelMetricsAtAC(int ac) { this.ac = ac; }
|
||||
|
||||
public void update(VariantContext eval) {
|
||||
int eventLength = 0;
|
||||
if ( eval.isInsertion() ) {
|
||||
eventLength = eval.getAlternateAllele(0).length();
|
||||
nIns++;
|
||||
} else if ( eval.isDeletion() ) {
|
||||
eventLength = -eval.getReference().length();
|
||||
nDel++;
|
||||
}
|
||||
else {
|
||||
nComplex++;
|
||||
}
|
||||
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
|
||||
sizeCount[len2Index(eventLength)]++;
|
||||
else
|
||||
nLong++;
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
// corresponding to METRIC_COLUMNS
|
||||
public String getColumn(int i) {
|
||||
if (i >= NUM_SCALAR_COLUMNS && i <=NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT)
|
||||
return String.valueOf(sizeCount[i-NUM_SCALAR_COLUMNS]);
|
||||
|
||||
switch (i) {
|
||||
case 0: return String.valueOf(ac);
|
||||
case 1: return String.valueOf(nIns);
|
||||
case 2: return String.valueOf(nDel);
|
||||
case 3: return String.valueOf(nIns + nDel);
|
||||
case 4: return String.valueOf(nComplex);
|
||||
case 5: return String.valueOf(nLong);
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected column " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class IndelMetricsByAc implements TableType {
|
||||
ArrayList<IndelMetricsAtAC> metrics = new ArrayList<IndelMetricsAtAC>();
|
||||
Object[] rows = null;
|
||||
|
||||
public IndelMetricsByAc( int nchromosomes ) {
|
||||
rows = new Object[nchromosomes+1];
|
||||
metrics = new ArrayList<IndelMetricsAtAC>(nchromosomes+1);
|
||||
for ( int i = 0; i < nchromosomes + 1; i++ ) {
|
||||
metrics.add(new IndelMetricsAtAC(i));
|
||||
rows[i] = "ac" + i;
|
||||
}
|
||||
}
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
return rows;
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
return METRIC_COLUMNS;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "IndelMetricsByAc";
|
||||
}
|
||||
|
||||
//
|
||||
public String getCell(int ac, int y) {
|
||||
return metrics.get(ac).getColumn(y);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "";
|
||||
}
|
||||
|
||||
public void incrValue( VariantContext eval ) {
|
||||
int ac = -1;
|
||||
|
||||
if ( eval.hasGenotypes() )
|
||||
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||
else if ( eval.hasAttribute("AC") ) {
|
||||
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
||||
}
|
||||
|
||||
if ( ac != -1 )
|
||||
metrics.get(ac).update(eval);
|
||||
}
|
||||
}
|
||||
|
||||
//public IndelMetricsByAC(VariantEvalWalker parent) {
|
||||
//super(parent);
|
||||
// don't do anything
|
||||
//}
|
||||
|
||||
public String getName() {
|
||||
return "IndelMetricsByAC";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if (eval != null ) {
|
||||
if ( metrics == null ) {
|
||||
int nSamples = numSamples;
|
||||
//int nSamples = 2;
|
||||
if ( nSamples != -1 )
|
||||
metrics = new IndelMetricsByAc(2 * nSamples);
|
||||
}
|
||||
|
||||
if ( eval.isIndel() && eval.isBiallelic() &&
|
||||
metrics != null ) {
|
||||
metrics.incrValue(eval);
|
||||
}
|
||||
}
|
||||
|
||||
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
}
|
||||
|
||||
//public void finalizeEvaluation() {
|
||||
//
|
||||
//}
|
||||
}
|
||||
|
|
@ -44,7 +44,7 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
@DataPoint(description = "Indel Statistics")
|
||||
IndelStats indelStats = null;
|
||||
|
||||
@DataPoint(description = "Indel Classification")
|
||||
// @DataPoint(description = "Indel Classification")
|
||||
IndelClasses indelClasses = null;
|
||||
|
||||
int numSamples = 0;
|
||||
|
|
@ -79,8 +79,7 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
}
|
||||
|
||||
static class IndelStats implements TableType {
|
||||
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||
protected final static String[] COLUMN_KEYS;
|
||||
protected final static String[] COLUMN_KEYS;
|
||||
|
||||
static {
|
||||
COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
|
||||
|
|
@ -104,13 +103,10 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
}
|
||||
|
||||
// map of sample to statistics
|
||||
protected final HashMap<String, int[]> indelSummary = new HashMap<String, int[]>();
|
||||
protected final int[] indelSummary;
|
||||
|
||||
public IndelStats(final VariantContext vc) {
|
||||
indelSummary.put(ALL_SAMPLES_KEY, new int[COLUMN_KEYS.length]);
|
||||
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||
indelSummary.put(sample, new int[COLUMN_KEYS.length]);
|
||||
}
|
||||
indelSummary = new int[COLUMN_KEYS.length];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -118,18 +114,18 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
* @return one row per sample
|
||||
*/
|
||||
public Object[] getRowKeys() {
|
||||
return indelSummary.keySet().toArray(new String[indelSummary.size()]);
|
||||
return new String[]{"all"};
|
||||
}
|
||||
public Object getCell(int x, int y) {
|
||||
final Object[] rowKeys = getRowKeys();
|
||||
if (y == IND_AT_CG_RATIO) {
|
||||
|
||||
int at = indelSummary.get(rowKeys[x])[IND_AT_EXP];
|
||||
int cg = indelSummary.get(rowKeys[x])[IND_CG_EXP];
|
||||
int at = indelSummary[IND_AT_EXP];
|
||||
int cg = indelSummary[IND_CG_EXP];
|
||||
return String.format("%4.2f",((double)at) / (Math.max(cg, 1)));
|
||||
}
|
||||
else
|
||||
return String.format("%d",indelSummary.get(rowKeys[x])[y]);
|
||||
return String.format("%d",indelSummary[y]);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -160,78 +156,31 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
int eventLength = 0;
|
||||
boolean isInsertion = false, isDeletion = false;
|
||||
|
||||
if ( vc.isInsertion() ) {
|
||||
if ( vc.isSimpleInsertion() ) {
|
||||
eventLength = vc.getAlternateAllele(0).length();
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_INS]++;
|
||||
indelSummary[IND_INS]++;
|
||||
isInsertion = true;
|
||||
} else if ( vc.isDeletion() ) {
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_DEL]++;
|
||||
} else if ( vc.isSimpleDeletion() ) {
|
||||
indelSummary[IND_DEL]++;
|
||||
eventLength = -vc.getReference().length();
|
||||
isDeletion = true;
|
||||
}
|
||||
else {
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_COMPLEX]++;
|
||||
indelSummary[IND_COMPLEX]++;
|
||||
}
|
||||
if (IndelUtils.isATExpansion(vc,ref))
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_AT_EXP]++;
|
||||
indelSummary[IND_AT_EXP]++;
|
||||
if (IndelUtils.isCGExpansion(vc,ref))
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_CG_EXP]++;
|
||||
indelSummary[IND_CG_EXP]++;
|
||||
|
||||
// make sure event doesn't overstep array boundaries
|
||||
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) {
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
|
||||
indelSummary[len2Index(eventLength)]++;
|
||||
if (eventLength % 3 != 0)
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_FRAMESHIFT]++;
|
||||
indelSummary[IND_FRAMESHIFT]++;
|
||||
}
|
||||
else
|
||||
indelSummary.get(ALL_SAMPLES_KEY)[IND_LONG]++;
|
||||
|
||||
|
||||
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||
if ( indelSummary.containsKey(sample) ) {
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
boolean isVariant = (g.isCalled() && !g.isHomRef());
|
||||
if (isVariant) {
|
||||
// update ins/del count
|
||||
if (isInsertion) {
|
||||
indelSummary.get(sample)[IND_INS]++;
|
||||
}
|
||||
else if (isDeletion)
|
||||
indelSummary.get(sample)[IND_DEL]++;
|
||||
else
|
||||
indelSummary.get(sample)[IND_COMPLEX]++;
|
||||
|
||||
// update histogram
|
||||
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) {
|
||||
indelSummary.get(sample)[len2Index(eventLength)]++;
|
||||
if (eventLength % 3 != 0)
|
||||
indelSummary.get(sample)[IND_FRAMESHIFT]++;
|
||||
}
|
||||
else
|
||||
indelSummary.get(sample)[IND_LONG]++;
|
||||
|
||||
if (g.isHet())
|
||||
if (isInsertion)
|
||||
indelSummary.get(sample)[IND_HET_INS]++;
|
||||
else if (isDeletion)
|
||||
indelSummary.get(sample)[IND_HET_DEL]++;
|
||||
else
|
||||
if (isInsertion)
|
||||
indelSummary.get(sample)[IND_HOM_INS]++;
|
||||
else if (isDeletion)
|
||||
indelSummary.get(sample)[IND_HOM_DEL]++;
|
||||
|
||||
if (IndelUtils.isATExpansion(vc,ref))
|
||||
indelSummary.get(sample)[IND_AT_EXP]++;
|
||||
if (IndelUtils.isCGExpansion(vc,ref))
|
||||
indelSummary.get(sample)[IND_CG_EXP]++;
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
indelSummary.get(sample)[IND_HOM_REF]++;
|
||||
}
|
||||
}
|
||||
indelSummary[IND_LONG]++;
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
// get the indel length
|
||||
int indelLength;
|
||||
if ( vc.isDeletion() )
|
||||
if ( vc.isSimpleDeletion() )
|
||||
indelLength = vc.getReference().length();
|
||||
else
|
||||
indelLength = vc.getAlternateAllele(0).length();
|
||||
|
|
@ -150,7 +150,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
// create a CIGAR string to represent the event
|
||||
ArrayList<CigarElement> elements = new ArrayList<CigarElement>();
|
||||
elements.add(new CigarElement(originalIndex, CigarOperator.M));
|
||||
elements.add(new CigarElement(indelLength, vc.isDeletion() ? CigarOperator.D : CigarOperator.I));
|
||||
elements.add(new CigarElement(indelLength, vc.isSimpleDeletion() ? CigarOperator.D : CigarOperator.I));
|
||||
elements.add(new CigarElement(refSeq.length - originalIndex, CigarOperator.M));
|
||||
Cigar originalCigar = new Cigar(elements);
|
||||
|
||||
|
|
@ -165,8 +165,8 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
int indelIndex = originalIndex-difference;
|
||||
byte[] newBases = new byte[indelLength];
|
||||
System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
|
||||
Allele newAllele = Allele.create(newBases, vc.isDeletion());
|
||||
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
|
||||
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
|
||||
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
|
||||
|
||||
writer.add(newVC);
|
||||
|
|
@ -178,14 +178,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRef, int indelLength) {
|
||||
byte[] hap = new byte[ref.length + (indelLength * (vc.isDeletion() ? -1 : 1))];
|
||||
byte[] hap = new byte[ref.length + (indelLength * (vc.isSimpleDeletion() ? -1 : 1))];
|
||||
|
||||
// add the bases before the indel
|
||||
System.arraycopy(ref, 0, hap, 0, indexOfRef);
|
||||
int currentPos = indexOfRef;
|
||||
|
||||
// take care of the indel
|
||||
if ( vc.isDeletion() ) {
|
||||
if ( vc.isSimpleDeletion() ) {
|
||||
indexOfRef += indelLength;
|
||||
} else {
|
||||
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
|
||||
|
|
|
|||
|
|
@ -137,11 +137,11 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
Allele reportedRefAllele = vc.getReference();
|
||||
Allele observedRefAllele;
|
||||
// insertions
|
||||
if ( vc.isInsertion() ) {
|
||||
if ( vc.isSimpleInsertion() ) {
|
||||
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
|
||||
}
|
||||
// deletions
|
||||
else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) {
|
||||
else if ( vc.isSimpleDeletion() || vc.isMixed() || vc.isMNP() ) {
|
||||
// we can't validate arbitrarily long deletions
|
||||
if ( reportedRefAllele.length() > 100 ) {
|
||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
|
||||
|
|
|
|||
|
|
@ -168,8 +168,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
continue;
|
||||
|
||||
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
||||
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
||||
hapmap.setActualAlleles(alleleMap);
|
||||
|
||||
// also, use the correct positioning for insertions
|
||||
|
|
|
|||
|
|
@ -121,9 +121,9 @@ public class IndelUtils {
|
|||
boolean done = false;
|
||||
|
||||
ArrayList<Integer> inds = new ArrayList<Integer>();
|
||||
if ( vc.isInsertion() ) {
|
||||
if ( vc.isSimpleInsertion() ) {
|
||||
indelAlleleString = vc.getAlternateAllele(0).getDisplayString();
|
||||
} else if ( vc.isDeletion() ) {
|
||||
} else if ( vc.isSimpleDeletion() ) {
|
||||
indelAlleleString = vc.getReference().getDisplayString();
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -583,24 +583,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
/**
|
||||
* @return true if the alleles indicate a simple insertion (i.e., the reference allele is Null)
|
||||
*/
|
||||
public boolean isInsertion() {
|
||||
// can't just call !isDeletion() because of complex indels
|
||||
return getType() == Type.INDEL && getReference().isNull();
|
||||
public boolean isSimpleInsertion() {
|
||||
// can't just call !isSimpleDeletion() because of complex indels
|
||||
return getType() == Type.INDEL && getReference().isNull() && isBiallelic();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the alleles indicate a simple deletion (i.e., a single alt allele that is Null)
|
||||
*/
|
||||
public boolean isDeletion() {
|
||||
// can't just call !isInsertion() because of complex indels
|
||||
return getType() == Type.INDEL && getAlternateAllele(0).isNull();
|
||||
public boolean isSimpleDeletion() {
|
||||
// can't just call !isSimpleInsertion() because of complex indels
|
||||
return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the alleles indicate neither a simple deletion nor a simple insertion
|
||||
*/
|
||||
public boolean isComplexIndel() {
|
||||
return isIndel() && !isDeletion() && !isInsertion();
|
||||
return isIndel() && !isSimpleDeletion() && !isSimpleInsertion();
|
||||
}
|
||||
|
||||
public boolean isSymbolic() {
|
||||
|
|
|
|||
|
|
@ -146,8 +146,8 @@ public class VariantContextUnitTest {
|
|||
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
|
||||
Assert.assertTrue(vc.isSNP());
|
||||
Assert.assertFalse(vc.isIndel());
|
||||
Assert.assertFalse(vc.isInsertion());
|
||||
Assert.assertFalse(vc.isDeletion());
|
||||
Assert.assertFalse(vc.isSimpleInsertion());
|
||||
Assert.assertFalse(vc.isSimpleDeletion());
|
||||
Assert.assertFalse(vc.isMixed());
|
||||
Assert.assertTrue(vc.isBiallelic());
|
||||
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||
|
|
@ -173,8 +173,8 @@ public class VariantContextUnitTest {
|
|||
Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType());
|
||||
Assert.assertFalse(vc.isSNP());
|
||||
Assert.assertFalse(vc.isIndel());
|
||||
Assert.assertFalse(vc.isInsertion());
|
||||
Assert.assertFalse(vc.isDeletion());
|
||||
Assert.assertFalse(vc.isSimpleInsertion());
|
||||
Assert.assertFalse(vc.isSimpleDeletion());
|
||||
Assert.assertFalse(vc.isMixed());
|
||||
Assert.assertFalse(vc.isBiallelic());
|
||||
Assert.assertEquals(vc.getNAlleles(), 1);
|
||||
|
|
@ -199,8 +199,8 @@ public class VariantContextUnitTest {
|
|||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
Assert.assertFalse(vc.isSNP());
|
||||
Assert.assertTrue(vc.isIndel());
|
||||
Assert.assertFalse(vc.isInsertion());
|
||||
Assert.assertTrue(vc.isDeletion());
|
||||
Assert.assertFalse(vc.isSimpleInsertion());
|
||||
Assert.assertTrue(vc.isSimpleDeletion());
|
||||
Assert.assertFalse(vc.isMixed());
|
||||
Assert.assertTrue(vc.isBiallelic());
|
||||
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||
|
|
@ -226,8 +226,8 @@ public class VariantContextUnitTest {
|
|||
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
|
||||
Assert.assertFalse(vc.isSNP());
|
||||
Assert.assertTrue(vc.isIndel());
|
||||
Assert.assertTrue(vc.isInsertion());
|
||||
Assert.assertFalse(vc.isDeletion());
|
||||
Assert.assertTrue(vc.isSimpleInsertion());
|
||||
Assert.assertFalse(vc.isSimpleDeletion());
|
||||
Assert.assertFalse(vc.isMixed());
|
||||
Assert.assertTrue(vc.isBiallelic());
|
||||
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||
|
|
@ -433,7 +433,7 @@ public class VariantContextUnitTest {
|
|||
Assert.assertFalse(vc14.isBiallelic());
|
||||
|
||||
Assert.assertTrue(vc5.isIndel());
|
||||
Assert.assertTrue(vc5.isDeletion());
|
||||
Assert.assertTrue(vc5.isSimpleDeletion());
|
||||
Assert.assertTrue(vc5.isVariant());
|
||||
Assert.assertTrue(vc5.isBiallelic());
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue