From 3dfb60a46e2d150cfde3169bfd516df604dbb2f9 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 18 Aug 2011 16:17:38 -0400 Subject: [PATCH 01/11] Fixing up and refactoring usage of indel categories. On a variant context, isInsertion() and isDeletion() are now removed because behavior before was wrong in case of multiallelic sites. Now, methods isSimpleInsertion() and isSimpleDeletion() will return true only if sites are biallelic. For multiallelic sites, isComplex() will return true in all cases. VariantEval module CountVariants is corrected and an additional column is added so that we log mixed events and complex indels separately (before they were being conflated). VariantEval module IndelStatistics is considerably simplified as the sample stratification was wrong and redundant, now it should work with the VE-generic Sample stratification. Several columns are renamed or removed since they're not really useful --- .../gatk/walkers/annotator/AlleleBalance.java | 2 +- .../walkers/annotator/HomopolymerRun.java | 2 +- .../gatk/walkers/annotator/IndelType.java | 4 +- .../fasta/FastaAlternateReferenceWalker.java | 4 +- .../gatk/walkers/indels/IndelRealigner.java | 6 +- .../indels/RealignerTargetCreator.java | 4 +- .../validation/ValidationAmplicons.java | 9 +- .../varianteval/evaluators/CountVariants.java | 18 +- .../evaluators/IndelLengthHistogram.java | 4 +- .../evaluators/IndelMetricsByAC.java | 221 ------------------ .../evaluators/IndelStatistics.java | 87 ++----- .../variantutils/LeftAlignVariants.java | 12 +- .../variantutils/ValidateVariants.java | 4 +- .../walkers/variantutils/VariantsToVCF.java | 4 +- .../sting/utils/IndelUtils.java | 4 +- .../utils/variantcontext/VariantContext.java | 14 +- .../VariantContextUnitTest.java | 18 +- 17 files changed, 75 insertions(+), 342 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 6a2ffe189..cf68a9121 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -90,7 +90,7 @@ public class AlleleBalance extends InfoFieldAnnotation { } // todo -- actually care about indel length from the pileup (agnostic at the moment) int refCount = indelPileup.size(); - int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions(); + int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions(); if ( refCount + altCount == 0 ) { continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 4102d811c..463f7a645 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -79,7 +79,7 @@ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnot GenomeLoc locus = ref.getLocus(); GenomeLoc window = ref.getWindow(); int refBasePos = (int) (locus.getStart() - window.getStart())+1; - if ( vc.isDeletion() ) { + if ( vc.isSimpleDeletion() ) { // check that deleted bases are the same byte dBase = bases[refBasePos]; for ( int i = 0; i < vc.getReference().length(); i ++ ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index ff7f9a8f6..bfede40d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -36,9 +36,9 @@ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnota if (!vc.isBiallelic()) type = "MULTIALLELIC_INDEL"; else { - if (vc.isInsertion()) + if (vc.isSimpleInsertion()) type = "INS."; - else if (vc.isDeletion()) + else if (vc.isSimpleDeletion()) type = "DEL."; else type = "OTHER."; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 8f333a2b3..fd912334f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -101,11 +101,11 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { if ( vc.isFiltered() ) continue; - if ( vc.isDeletion()) { + if ( vc.isSimpleDeletion()) { deletionBasesRemaining = vc.getReference().length(); // delete the next n bases, not this one return new Pair(context.getLocation(), refBase); - } else if ( vc.isInsertion()) { + } else if ( vc.isSimpleInsertion()) { return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); } else if (vc.isSNP()) { return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index d766ae8bd..129be7f55 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -877,7 +877,7 @@ public class IndelRealigner extends ReadWalker { for ( VariantContext knownIndel : knownIndelsToTry ) { if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) continue; - byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); + byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); int start = knownIndel.getStart() - leftmostIndex + 1; Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); if ( c != null ) @@ -1079,11 +1079,11 @@ public class IndelRealigner extends ReadWalker { if ( indexOnRef > 0 ) cigar.add(new CigarElement(indexOnRef, CigarOperator.M)); - if ( indel.isDeletion() ) { + if ( indel.isSimpleDeletion() ) { refIdx += indelStr.length; cigar.add(new CigarElement(indelStr.length, CigarOperator.D)); } - else if ( indel.isInsertion() ) { + else if ( indel.isSimpleInsertion() ) { for ( byte b : indelStr ) sb.append((char)b); cigar.add(new CigarElement(indelStr.length, CigarOperator.I)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 08ed1af52..48911b952 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -178,7 +178,7 @@ public class RealignerTargetCreator extends RodWalker { } else /* (mask != null && validate == null ) */ { if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) { logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed."); - logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles()))); + logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isSimpleInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles()))); sequenceInvalid = true; - invReason.add(mask.isInsertion() ? "INSERTION" : "DELETION"); + invReason.add(mask.isSimpleInsertion() ? "INSERTION" : "DELETION"); // note: indelCounter could be > 0 (could have small deletion within larger one). This always selects // the larger event. - int indelCounterNew = mask.isInsertion() ? 2 : mask.getEnd()-mask.getStart(); + int indelCounterNew = mask.isSimpleInsertion() ? 2 : mask.getEnd()-mask.getStart(); if ( indelCounterNew > indelCounter ) { indelCounter = indelCounterNew; } //sequence.append((char) ref.getBase()); - //sequence.append(mask.isInsertion() ? 'I' : 'D'); + //sequence.append(mask.isSimpleInsertion() ? 'I' : 'D'); sequence.append("N"); indelCounter--; rawSequence.append(Character.toUpperCase((char) ref.getBase())); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 87b8bac1d..b356a68dc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -39,8 +39,10 @@ public class CountVariants extends VariantEvaluator implements StandardEval { public long nInsertions = 0; @DataPoint(description = "Number of deletions") public long nDeletions = 0; - @DataPoint(description = "Number of complex loci") + @DataPoint(description = "Number of complex indels") public long nComplex = 0; + @DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)") + public long nMixed = 0; @DataPoint(description = "Number of no calls loci") @@ -113,11 +115,15 @@ public class CountVariants extends VariantEvaluator implements StandardEval { if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++; break; case INDEL: - if (vc1.isInsertion()) nInsertions++; - else nDeletions++; + if (vc1.isSimpleInsertion()) + nInsertions++; + else if (vc1.isSimpleDeletion()) + nDeletions++; + else + nComplex++; break; case MIXED: - nComplex++; + nMixed++; break; default: throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); @@ -180,8 +186,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval { heterozygosity = perLocusRate(nHets); heterozygosityPerBp = perLocusRInverseRate(nHets); hetHomRatio = ratio(nHets, nHomVar); - indelRate = perLocusRate(nDeletions + nInsertions); - indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions); + indelRate = perLocusRate(nDeletions + nInsertions + nComplex); + indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex); deletionInsertionRatio = ratio(nDeletions, nInsertions); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 77def0f30..35fffd815 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -96,9 +96,9 @@ public class IndelLengthHistogram extends VariantEvaluator { } if ( vc1.isIndel() ) { - if ( vc1.isInsertion() ) { + if ( vc1.isSimpleInsertion() ) { indelHistogram.update(vc1.getAlternateAllele(0).length()); - } else if ( vc1.isDeletion() ) { + } else if ( vc1.isSimpleDeletion() ) { indelHistogram.update(-vc1.getReference().length()); } else { throw new ReviewedStingException("Indel type that is not insertion or deletion."); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java deleted file mode 100755 index 6e1b76acd..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelMetricsByAC.java +++ /dev/null @@ -1,221 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.ArrayList; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author delangel - * @since Apr 11, 2010 - */ - -@Analysis(name = "Indel Metrics by allele count", description = "Shows various stats binned by allele count") -public class IndelMetricsByAC extends VariantEvaluator { - // a mapping from quality score histogram bin to Ti/Tv ratio - @DataPoint(description = "Indel Metrics by allele count") - IndelMetricsByAc metrics = null; - - int numSamples = 0; - - public void initialize(VariantEvalWalker walker) { - numSamples = walker.getNumSamples(); - } - - //@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count") - //AlleleCountStats alleleCountStats = null; - private static final int INDEL_SIZE_LIMIT = 100; - private static final int NUM_SCALAR_COLUMNS = 6; - static int len2Index(int ind) { - return ind+INDEL_SIZE_LIMIT; - } - - static int index2len(int ind) { - return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS; - } - - protected final static String[] METRIC_COLUMNS; - static { - METRIC_COLUMNS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1]; - METRIC_COLUMNS[0] = "AC"; - METRIC_COLUMNS[1] = "nIns"; - METRIC_COLUMNS[2] = "nDels"; - METRIC_COLUMNS[3] = "n"; - METRIC_COLUMNS[4] = "nComplex"; - METRIC_COLUMNS[5] = "nLong"; - - for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++) - METRIC_COLUMNS[k] = "indel_size_len"+Integer.valueOf(index2len(k)); - } - - class IndelMetricsAtAC { - public int ac = -1, nIns =0, nDel = 0, nComplex = 0, nLong; - public int sizeCount[] = new int[2*INDEL_SIZE_LIMIT+1]; - - public IndelMetricsAtAC(int ac) { this.ac = ac; } - - public void update(VariantContext eval) { - int eventLength = 0; - if ( eval.isInsertion() ) { - eventLength = eval.getAlternateAllele(0).length(); - nIns++; - } else if ( eval.isDeletion() ) { - eventLength = -eval.getReference().length(); - nDel++; - } - else { - nComplex++; - } - if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) - sizeCount[len2Index(eventLength)]++; - else - nLong++; - - - - } - - // corresponding to METRIC_COLUMNS - public String getColumn(int i) { - if (i >= NUM_SCALAR_COLUMNS && i <=NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT) - return String.valueOf(sizeCount[i-NUM_SCALAR_COLUMNS]); - - switch (i) { - case 0: return String.valueOf(ac); - case 1: return String.valueOf(nIns); - case 2: return String.valueOf(nDel); - case 3: return String.valueOf(nIns + nDel); - case 4: return String.valueOf(nComplex); - case 5: return String.valueOf(nLong); - - default: - throw new ReviewedStingException("Unexpected column " + i); - } - } - } - - class IndelMetricsByAc implements TableType { - ArrayList metrics = new ArrayList(); - Object[] rows = null; - - public IndelMetricsByAc( int nchromosomes ) { - rows = new Object[nchromosomes+1]; - metrics = new ArrayList(nchromosomes+1); - for ( int i = 0; i < nchromosomes + 1; i++ ) { - metrics.add(new IndelMetricsAtAC(i)); - rows[i] = "ac" + i; - } - } - - public Object[] getRowKeys() { - return rows; - } - - public Object[] getColumnKeys() { - return METRIC_COLUMNS; - } - - public String getName() { - return "IndelMetricsByAc"; - } - - // - public String getCell(int ac, int y) { - return metrics.get(ac).getColumn(y); - } - - public String toString() { - return ""; - } - - public void incrValue( VariantContext eval ) { - int ac = -1; - - if ( eval.hasGenotypes() ) - ac = eval.getChromosomeCount(eval.getAlternateAllele(0)); - else if ( eval.hasAttribute("AC") ) { - ac = Integer.valueOf(eval.getAttributeAsString("AC")); - } - - if ( ac != -1 ) - metrics.get(ac).update(eval); - } - } - - //public IndelMetricsByAC(VariantEvalWalker parent) { - //super(parent); - // don't do anything - //} - - public String getName() { - return "IndelMetricsByAC"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public boolean enabled() { - return true; - } - - public String toString() { - return getName(); - } - - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - final String interesting = null; - - if (eval != null ) { - if ( metrics == null ) { - int nSamples = numSamples; - //int nSamples = 2; - if ( nSamples != -1 ) - metrics = new IndelMetricsByAc(2 * nSamples); - } - - if ( eval.isIndel() && eval.isBiallelic() && - metrics != null ) { - metrics.incrValue(eval); - } - } - - return interesting; // This module doesn't capture any interesting sites, so return null - } - - //public void finalizeEvaluation() { - // - //} -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index d99196ecf..78683dfcb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -44,7 +44,7 @@ public class IndelStatistics extends VariantEvaluator { @DataPoint(description = "Indel Statistics") IndelStats indelStats = null; - @DataPoint(description = "Indel Classification") + // @DataPoint(description = "Indel Classification") IndelClasses indelClasses = null; int numSamples = 0; @@ -79,8 +79,7 @@ public class IndelStatistics extends VariantEvaluator { } static class IndelStats implements TableType { - protected final static String ALL_SAMPLES_KEY = "allSamples"; - protected final static String[] COLUMN_KEYS; + protected final static String[] COLUMN_KEYS; static { COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1]; @@ -104,13 +103,10 @@ public class IndelStatistics extends VariantEvaluator { } // map of sample to statistics - protected final HashMap indelSummary = new HashMap(); + protected final int[] indelSummary; public IndelStats(final VariantContext vc) { - indelSummary.put(ALL_SAMPLES_KEY, new int[COLUMN_KEYS.length]); - for( final String sample : vc.getGenotypes().keySet() ) { - indelSummary.put(sample, new int[COLUMN_KEYS.length]); - } + indelSummary = new int[COLUMN_KEYS.length]; } /** @@ -118,18 +114,18 @@ public class IndelStatistics extends VariantEvaluator { * @return one row per sample */ public Object[] getRowKeys() { - return indelSummary.keySet().toArray(new String[indelSummary.size()]); + return new String[]{"all"}; } public Object getCell(int x, int y) { final Object[] rowKeys = getRowKeys(); if (y == IND_AT_CG_RATIO) { - int at = indelSummary.get(rowKeys[x])[IND_AT_EXP]; - int cg = indelSummary.get(rowKeys[x])[IND_CG_EXP]; + int at = indelSummary[IND_AT_EXP]; + int cg = indelSummary[IND_CG_EXP]; return String.format("%4.2f",((double)at) / (Math.max(cg, 1))); } else - return String.format("%d",indelSummary.get(rowKeys[x])[y]); + return String.format("%d",indelSummary[y]); } @@ -160,78 +156,31 @@ public class IndelStatistics extends VariantEvaluator { int eventLength = 0; boolean isInsertion = false, isDeletion = false; - if ( vc.isInsertion() ) { + if ( vc.isSimpleInsertion() ) { eventLength = vc.getAlternateAllele(0).length(); - indelSummary.get(ALL_SAMPLES_KEY)[IND_INS]++; + indelSummary[IND_INS]++; isInsertion = true; - } else if ( vc.isDeletion() ) { - indelSummary.get(ALL_SAMPLES_KEY)[IND_DEL]++; + } else if ( vc.isSimpleDeletion() ) { + indelSummary[IND_DEL]++; eventLength = -vc.getReference().length(); isDeletion = true; } else { - indelSummary.get(ALL_SAMPLES_KEY)[IND_COMPLEX]++; + indelSummary[IND_COMPLEX]++; } if (IndelUtils.isATExpansion(vc,ref)) - indelSummary.get(ALL_SAMPLES_KEY)[IND_AT_EXP]++; + indelSummary[IND_AT_EXP]++; if (IndelUtils.isCGExpansion(vc,ref)) - indelSummary.get(ALL_SAMPLES_KEY)[IND_CG_EXP]++; + indelSummary[IND_CG_EXP]++; // make sure event doesn't overstep array boundaries if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) { - indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++; + indelSummary[len2Index(eventLength)]++; if (eventLength % 3 != 0) - indelSummary.get(ALL_SAMPLES_KEY)[IND_FRAMESHIFT]++; + indelSummary[IND_FRAMESHIFT]++; } else - indelSummary.get(ALL_SAMPLES_KEY)[IND_LONG]++; - - - for( final String sample : vc.getGenotypes().keySet() ) { - if ( indelSummary.containsKey(sample) ) { - Genotype g = vc.getGenotype(sample); - boolean isVariant = (g.isCalled() && !g.isHomRef()); - if (isVariant) { - // update ins/del count - if (isInsertion) { - indelSummary.get(sample)[IND_INS]++; - } - else if (isDeletion) - indelSummary.get(sample)[IND_DEL]++; - else - indelSummary.get(sample)[IND_COMPLEX]++; - - // update histogram - if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) { - indelSummary.get(sample)[len2Index(eventLength)]++; - if (eventLength % 3 != 0) - indelSummary.get(sample)[IND_FRAMESHIFT]++; - } - else - indelSummary.get(sample)[IND_LONG]++; - - if (g.isHet()) - if (isInsertion) - indelSummary.get(sample)[IND_HET_INS]++; - else if (isDeletion) - indelSummary.get(sample)[IND_HET_DEL]++; - else - if (isInsertion) - indelSummary.get(sample)[IND_HOM_INS]++; - else if (isDeletion) - indelSummary.get(sample)[IND_HOM_DEL]++; - - if (IndelUtils.isATExpansion(vc,ref)) - indelSummary.get(sample)[IND_AT_EXP]++; - if (IndelUtils.isCGExpansion(vc,ref)) - indelSummary.get(sample)[IND_CG_EXP]++; - - - } - else - indelSummary.get(sample)[IND_HOM_REF]++; - } - } + indelSummary[IND_LONG]++; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 9fae71e4e..c9f330db5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -133,7 +133,7 @@ public class LeftAlignVariants extends RodWalker { // get the indel length int indelLength; - if ( vc.isDeletion() ) + if ( vc.isSimpleDeletion() ) indelLength = vc.getReference().length(); else indelLength = vc.getAlternateAllele(0).length(); @@ -150,7 +150,7 @@ public class LeftAlignVariants extends RodWalker { // create a CIGAR string to represent the event ArrayList elements = new ArrayList(); elements.add(new CigarElement(originalIndex, CigarOperator.M)); - elements.add(new CigarElement(indelLength, vc.isDeletion() ? CigarOperator.D : CigarOperator.I)); + elements.add(new CigarElement(indelLength, vc.isSimpleDeletion() ? CigarOperator.D : CigarOperator.I)); elements.add(new CigarElement(refSeq.length - originalIndex, CigarOperator.M)); Cigar originalCigar = new Cigar(elements); @@ -165,8 +165,8 @@ public class LeftAlignVariants extends RodWalker { int indelIndex = originalIndex-difference; byte[] newBases = new byte[indelLength]; - System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); - Allele newAllele = Allele.create(newBases, vc.isDeletion()); + System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); + Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); writer.add(newVC); @@ -178,14 +178,14 @@ public class LeftAlignVariants extends RodWalker { } private static byte[] makeHaplotype(VariantContext vc, byte[] ref, int indexOfRef, int indelLength) { - byte[] hap = new byte[ref.length + (indelLength * (vc.isDeletion() ? -1 : 1))]; + byte[] hap = new byte[ref.length + (indelLength * (vc.isSimpleDeletion() ? -1 : 1))]; // add the bases before the indel System.arraycopy(ref, 0, hap, 0, indexOfRef); int currentPos = indexOfRef; // take care of the indel - if ( vc.isDeletion() ) { + if ( vc.isSimpleDeletion() ) { indexOfRef += indelLength; } else { System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 01a6e2f70..c0f695966 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -137,11 +137,11 @@ public class ValidateVariants extends RodWalker { Allele reportedRefAllele = vc.getReference(); Allele observedRefAllele; // insertions - if ( vc.isInsertion() ) { + if ( vc.isSimpleInsertion() ) { observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); } // deletions - else if ( vc.isDeletion() || vc.isMixed() || vc.isMNP() ) { + else if ( vc.isSimpleDeletion() || vc.isMixed() || vc.isMNP() ) { // we can't validate arbitrarily long deletions if ( reportedRefAllele.length() > 100 ) { logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 61851abe2..b41ce394f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -168,8 +168,8 @@ public class VariantsToVCF extends RodWalker { continue; Map alleleMap = new HashMap(2); - alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isInsertion())); - alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isInsertion())); + alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions diff --git a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java index af69ebca6..74f147127 100755 --- a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java @@ -121,9 +121,9 @@ public class IndelUtils { boolean done = false; ArrayList inds = new ArrayList(); - if ( vc.isInsertion() ) { + if ( vc.isSimpleInsertion() ) { indelAlleleString = vc.getAlternateAllele(0).getDisplayString(); - } else if ( vc.isDeletion() ) { + } else if ( vc.isSimpleDeletion() ) { indelAlleleString = vc.getReference().getDisplayString(); } else { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index ca3399c78..d953085ab 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -583,24 +583,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati /** * @return true if the alleles indicate a simple insertion (i.e., the reference allele is Null) */ - public boolean isInsertion() { - // can't just call !isDeletion() because of complex indels - return getType() == Type.INDEL && getReference().isNull(); + public boolean isSimpleInsertion() { + // can't just call !isSimpleDeletion() because of complex indels + return getType() == Type.INDEL && getReference().isNull() && isBiallelic(); } /** * @return true if the alleles indicate a simple deletion (i.e., a single alt allele that is Null) */ - public boolean isDeletion() { - // can't just call !isInsertion() because of complex indels - return getType() == Type.INDEL && getAlternateAllele(0).isNull(); + public boolean isSimpleDeletion() { + // can't just call !isSimpleInsertion() because of complex indels + return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic(); } /** * @return true if the alleles indicate neither a simple deletion nor a simple insertion */ public boolean isComplexIndel() { - return isIndel() && !isDeletion() && !isInsertion(); + return isIndel() && !isSimpleDeletion() && !isSimpleInsertion(); } public boolean isSymbolic() { diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index d8fa0eae4..f8e6da20a 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -146,8 +146,8 @@ public class VariantContextUnitTest { Assert.assertEquals(vc.getType(), VariantContext.Type.SNP); Assert.assertTrue(vc.isSNP()); Assert.assertFalse(vc.isIndel()); - Assert.assertFalse(vc.isInsertion()); - Assert.assertFalse(vc.isDeletion()); + Assert.assertFalse(vc.isSimpleInsertion()); + Assert.assertFalse(vc.isSimpleDeletion()); Assert.assertFalse(vc.isMixed()); Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); @@ -173,8 +173,8 @@ public class VariantContextUnitTest { Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType()); Assert.assertFalse(vc.isSNP()); Assert.assertFalse(vc.isIndel()); - Assert.assertFalse(vc.isInsertion()); - Assert.assertFalse(vc.isDeletion()); + Assert.assertFalse(vc.isSimpleInsertion()); + Assert.assertFalse(vc.isSimpleDeletion()); Assert.assertFalse(vc.isMixed()); Assert.assertFalse(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 1); @@ -199,8 +199,8 @@ public class VariantContextUnitTest { Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); - Assert.assertFalse(vc.isInsertion()); - Assert.assertTrue(vc.isDeletion()); + Assert.assertFalse(vc.isSimpleInsertion()); + Assert.assertTrue(vc.isSimpleDeletion()); Assert.assertFalse(vc.isMixed()); Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); @@ -226,8 +226,8 @@ public class VariantContextUnitTest { Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); - Assert.assertTrue(vc.isInsertion()); - Assert.assertFalse(vc.isDeletion()); + Assert.assertTrue(vc.isSimpleInsertion()); + Assert.assertFalse(vc.isSimpleDeletion()); Assert.assertFalse(vc.isMixed()); Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); @@ -433,7 +433,7 @@ public class VariantContextUnitTest { Assert.assertFalse(vc14.isBiallelic()); Assert.assertTrue(vc5.isIndel()); - Assert.assertTrue(vc5.isDeletion()); + Assert.assertTrue(vc5.isSimpleDeletion()); Assert.assertTrue(vc5.isVariant()); Assert.assertTrue(vc5.isBiallelic()); From 626cbf94118e2da51649dfc445670d10f4703852 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 18 Aug 2011 16:28:40 -0400 Subject: [PATCH 02/11] Bug fixes and cleanups for IndelStatistics --- .../evaluators/IndelStatistics.java | 70 ++++++------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index 78683dfcb..fc347339d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -57,13 +57,13 @@ public class IndelStatistics extends VariantEvaluator { private static final int IND_HET = 0; private static final int IND_INS = 1; private static final int IND_DEL = 2; - private static final int IND_AT_CG_RATIO = 3; + private static final int IND_COMPLEX = 3; private static final int IND_HET_INS = 4; private static final int IND_HOM_INS = 5; private static final int IND_HET_DEL = 6; private static final int IND_HOM_DEL = 7; private static final int IND_HOM_REF = 8; - private static final int IND_COMPLEX = 9; + private static final int IND_MIXED = 9; private static final int IND_LONG = 10; private static final int IND_AT_EXP = 11; private static final int IND_CG_EXP = 12; @@ -86,7 +86,7 @@ public class IndelStatistics extends VariantEvaluator { COLUMN_KEYS[0] = "heterozygosity"; COLUMN_KEYS[1] = "insertions"; COLUMN_KEYS[2] = "deletions"; - COLUMN_KEYS[3] = "AT_CG_expansion_ratio"; + COLUMN_KEYS[3] = "complex"; COLUMN_KEYS[4] = "het_insertions"; COLUMN_KEYS[5] = "homozygous_insertions"; COLUMN_KEYS[6] = "het_deletions"; @@ -117,16 +117,7 @@ public class IndelStatistics extends VariantEvaluator { return new String[]{"all"}; } public Object getCell(int x, int y) { - final Object[] rowKeys = getRowKeys(); - if (y == IND_AT_CG_RATIO) { - - int at = indelSummary[IND_AT_EXP]; - int cg = indelSummary[IND_CG_EXP]; - return String.format("%4.2f",((double)at) / (Math.max(cg, 1))); - } - else - return String.format("%d",indelSummary[y]); - + return String.format("%d",indelSummary[y]); } /** @@ -165,40 +156,40 @@ public class IndelStatistics extends VariantEvaluator { eventLength = -vc.getReference().length(); isDeletion = true; } - else { + else if (vc.isComplexIndel()) { indelSummary[IND_COMPLEX]++; } + else if (vc.isMixed()) + indelSummary[IND_MIXED]++; + if (IndelUtils.isATExpansion(vc,ref)) indelSummary[IND_AT_EXP]++; if (IndelUtils.isCGExpansion(vc,ref)) indelSummary[IND_CG_EXP]++; // make sure event doesn't overstep array boundaries - if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) { - indelSummary[len2Index(eventLength)]++; - if (eventLength % 3 != 0) - indelSummary[IND_FRAMESHIFT]++; + if (vc.isSimpleDeletion() || vc.isSimpleInsertion()) { + if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) { + indelSummary[len2Index(eventLength)]++; + if (eventLength % 3 != 0) + indelSummary[IND_FRAMESHIFT]++; + } + else + indelSummary[IND_LONG]++; } - else - indelSummary[IND_LONG]++; - } } static class IndelClasses implements TableType { - protected final static String ALL_SAMPLES_KEY = "allSamples"; protected final static String[] columnNames = IndelUtils.getIndelClassificationNames(); // map of sample to statistics - protected final HashMap indelClassSummary = new HashMap(); + protected final int[] indelClassSummary; public IndelClasses(final VariantContext vc) { - indelClassSummary.put(ALL_SAMPLES_KEY, new int[columnNames.length]); - for( final String sample : vc.getGenotypes().keySet() ) { - indelClassSummary.put(sample, new int[columnNames.length]); - } + indelClassSummary = new int[columnNames.length]; } /** @@ -206,11 +197,10 @@ public class IndelStatistics extends VariantEvaluator { * @return one row per sample */ public Object[] getRowKeys() { - return indelClassSummary.keySet().toArray(new String[indelClassSummary.size()]); + return new String[]{"all"}; } public Object getCell(int x, int y) { - final Object[] rowKeys = getRowKeys(); - return String.format("%d",indelClassSummary.get(rowKeys[x])[y]); + return String.format("%d",indelClassSummary[y]); } /** @@ -234,18 +224,7 @@ public class IndelStatistics extends VariantEvaluator { } private void incrementSampleStat(VariantContext vc, int index) { - indelClassSummary.get(ALL_SAMPLES_KEY)[index]++; - for( final String sample : vc.getGenotypes().keySet() ) { - if ( indelClassSummary.containsKey(sample) ) { - Genotype g = vc.getGenotype(sample); - boolean isVariant = (g.isCalled() && !g.isHomRef()); - if (isVariant) - // update count - indelClassSummary.get(sample)[index]++; - - } - } - + indelClassSummary[index]++; } /* * increment the specified value @@ -293,16 +272,13 @@ public class IndelStatistics extends VariantEvaluator { if (eval != null ) { if ( indelStats == null ) { - int nSamples = numSamples; - - if ( nSamples != -1 ) - indelStats = new IndelStats(eval); + indelStats = new IndelStats(eval); } if ( indelClasses == null ) { indelClasses = new IndelClasses(eval); } - if ( eval.isIndel() && eval.isBiallelic() ) { + if ( eval.isIndel() || eval.isMixed() ) { if (indelStats != null ) indelStats.incrValue(eval, ref); From 782453235a808acf04ea37298a2e6ebd911c6f9a Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sat, 20 Aug 2011 12:24:22 -0400 Subject: [PATCH 06/11] Updated VariantEvalIntegrationTest since there's a new column separating nMixed and nComplex in CountVariants Misc updates to WholeGenomeIndelCalling.scala Bug fix in VariantEval (may be temporary, need more investigation): if -disc option is used in sites-only vcf's then a null pointer exception is produced, caused by recent introduction of -xl_sf options. --- .../walkers/variantutils/SelectVariants.java | 12 ++++---- .../VariantEvalIntegrationTest.java | 30 +++++++++---------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index f6b6a8d65..93bc9e518 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -344,11 +344,13 @@ public class SelectVariants extends RodWalker { } // now, exclude any requested samples - Collection XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); - samples.removeAll(XLsamplesFromFile); - samples.removeAll(XLsampleNames); - - if ( samples.size() == 0 ) + if (XLsampleFiles != null) + if(!XLsampleFiles.isEmpty()) { + Collection XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); + samples.removeAll(XLsamplesFromFile); + samples.removeAll(XLsampleNames); + } + if ( samples.size() == 0 && !NO_SAMPLES_SPECIFIED ) throw new UserException("All samples requested to be included were also requested to be excluded."); for ( String sample : samples ) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 1de9a72d8..3503a2353 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -29,7 +29,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("bced1842c78fbabb089dd12b7087050d") + Arrays.asList("1fefd6cf9c2554d5f886c3998defd4d0") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -50,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("06510bd37ffaa39e817ca0dcaf8f8ac2") + Arrays.asList("d470e00a368b5a0468012818994c6a89") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -72,7 +72,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("19c5b1b6396921c5b1059a2849ae4fcc") + Arrays.asList("12856e52c2682328f91594089328596c") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -93,7 +93,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a71f8d81cf166cd97ac628092650964a") + Arrays.asList("91610b9240f64e0eb03cfd2602cf57af") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -114,7 +114,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("4dabe0658232f6174188515db6dfe112") + Arrays.asList("e40b77e7ed6581328e373a24b93cd170") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -135,7 +135,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("3340587f10ceff83e5567ddfd1a9a60e") + Arrays.asList("15beaf3823c131cabc5fb0445239f978") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -156,7 +156,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c730c7ee31c8138cef6efd8dd04fbbfc") + Arrays.asList("7ddd4ee74938d229ce5cb7b9b9104abe") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -179,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2559ca8f454b03e81561f6947f79df18") + Arrays.asList("a90f33906a732ef5eb346e559c96ccc1") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -204,7 +204,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("23aa5f97641d2fd033095f21c51d2f37") + Arrays.asList("2567f90d3d7354850c5a59730ecc6e4f") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -223,7 +223,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a69dd3f06903b3f374c6d6f010c653e0") + Arrays.asList("fa091aa8967893389c51102fd9f0bebb") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -236,7 +236,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("125fe0a04b5d933cc14016598b2791cd")); + 1, Arrays.asList("2df4f8911ffc3c8d042298723ed465f8")); executeTestParallel("testSelect1", spec); } @@ -253,7 +253,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ed54aa127b173d8ad8b6482f2a929a42")); executeTestParallel("testCompVsEvalAC",spec); } @@ -283,7 +283,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("cbea5f9f8c046d4c014d261db352c43b")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("18c44636e36d6657110bf984f8eac181")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -295,7 +295,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d07a246963ae609643620c839b20cd1e")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("1b8ae4fd10de0888bd843f833859d990")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -373,7 +373,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("44464fe7c89a56cf128a932ef640f7da") + Arrays.asList("da65fc8f0d0eeaf0a0b06a07f444bb8e") ); executeTest("testAlleleCountStrat", spec); } From 0ccd1739676b0cf8210df2fc07bff30684390183 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sat, 20 Aug 2011 21:30:08 -0400 Subject: [PATCH 07/11] Fixing the recent SelectVariants fix --- .../gatk/walkers/variantutils/SelectVariants.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 93bc9e518..bfe7198cf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -209,7 +209,7 @@ public class SelectVariants extends RodWalker { * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. */ @Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) - public Set XLsampleFiles; + public Set XLsampleFiles = new HashSet(0); /** * Note that these expressions are evaluated *after* the specified samples are extracted and the INFO field annotations are updated. @@ -344,12 +344,10 @@ public class SelectVariants extends RodWalker { } // now, exclude any requested samples - if (XLsampleFiles != null) - if(!XLsampleFiles.isEmpty()) { - Collection XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); - samples.removeAll(XLsamplesFromFile); - samples.removeAll(XLsampleNames); - } + Collection XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); + samples.removeAll(XLsamplesFromFile); + samples.removeAll(XLsampleNames); + if ( samples.size() == 0 && !NO_SAMPLES_SPECIFIED ) throw new UserException("All samples requested to be included were also requested to be excluded."); From a8cbced71bdf9b0e4e97def7c7b5c26520865ab5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sat, 20 Aug 2011 22:49:51 -0400 Subject: [PATCH 08/11] Bug fix for Ryan: check for no context --- .../sting/gatk/walkers/varianteval/VariantEvalWalker.java | 3 ++- .../sting/gatk/walkers/varianteval/util/VariantEvalUtils.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index f6d42afb1..613a31ed3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -311,7 +311,8 @@ public class VariantEvalWalker extends RodWalker implements Tr // for each comp track for ( final RodBinding compRod : comps ) { // no sample stratification for comps - final Set compSet = compVCs.get(compRod) == null ? new HashSet(0) : compVCs.get(compRod).values().iterator().next(); + final HashMap> compSetHash = compVCs.get(compRod); + final Set compSet = (compSetHash == null || compSetHash.size() == 0) ? new HashSet(0) : compVCs.get(compRod).values().iterator().next(); // find the comp final VariantContext comp = findMatchingComp(eval, compSet); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index ed0e8d7f6..f31dd9f9f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -347,9 +347,9 @@ public class VariantEvalUtils { } } } - - bindings.put(track, mapping); } + + bindings.put(track, mapping); } return bindings; From 22ca44c015382c67423824d4a1a12a4bb02aa9ea Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Sun, 21 Aug 2011 02:34:20 -0400 Subject: [PATCH 11/11] Fixed Queue's tagging of RodBindings. Fixed argument definition names. --- .../gatk/ArgumentDefinitionField.java | 83 ++++++++++++------- 1 file changed, 55 insertions(+), 28 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index c09c4037e..cb5bad4ae 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -144,6 +144,9 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam")); + } else if ((RodBinding.class.equals(argumentDefinition.argumentType) || RodBinding.class.equals(argumentDefinition.componentType)) && argumentDefinition.ioType == ArgumentIOType.INPUT) { + return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, Tribble.STANDARD_INDEX_EXTENSION)); + } else if (argumentDefinition.ioType == ArgumentIOType.INPUT) { return Collections.singletonList(new InputArgumentField(argumentDefinition)); @@ -196,7 +199,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } // if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) - // Change intervals exclusize of intervalsString. + // Change intervals exclusive of intervalsString. private static class IntervalFileArgumentField extends InputArgumentField { public IntervalFileArgumentField(ArgumentDefinition argumentDefinition) { super(argumentDefinition); @@ -332,9 +335,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } } - /** - * The other extreme of a NamedRodBindingField, allows the user to specify the track name, track type, and the file. - */ + // Allows the user to specify the track name, track type, and the file. public static class RodBindArgumentField extends ArgumentDefinitionField { public RodBindArgumentField(ArgumentDefinition argumentDefinition) { super(argumentDefinition); @@ -347,25 +348,28 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } } - /** - * Named input_files. - */ + // Tagged input_files or other rods. public static class InputTaggedFileDefinitionField extends ArgumentDefinitionField { public InputTaggedFileDefinitionField(ArgumentDefinition argumentDefinition) { super(argumentDefinition); } @Override protected Class getInnerType() { return null; } // TaggedFile does not need to be imported. - @Override protected String getFieldType() { return "List[File]"; } - @Override protected String getDefaultValue() { return "Nil"; } + @Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "List[File]" : "File"; } + @Override protected String getDefaultValue() { return argumentDefinition.isMultiValued ? "Nil" : "_"; } @Override protected String getCommandLineTemplate() { - return " + repeat(\"\", %3$s, format=TaggedFile.formatCommandLine(\"%1$s\"))"; + if (argumentDefinition.isMultiValued) { + return " + repeat(\"\", %3$s, format=TaggedFile.formatCommandLine(\"%1$s\"))"; + } else if (!argumentDefinition.required) { + return " + optional(\"\", %3$s, format=TaggedFile.formatCommandLine(\"%1$s\"))"; + } else { + return " + TaggedFile.formatCommandLine(\"%1$s\")(\"\", %3$s, \"\")"; + } } } - /** - * Adds optional inputs for the indexes of any bams or sams added to this function. - */ + // Adds optional inputs for the indexes of any rods added to this function. private static class InputIndexesArgumentField extends ArgumentField { + private final boolean originalIsMultiValued; private final String indexFieldName; private final String originalFieldName; private final String indexSuffix; @@ -374,14 +378,19 @@ public abstract class ArgumentDefinitionField extends ArgumentField { this(originalArgumentDefinition, indexSuffix, null); } public InputIndexesArgumentField(ArgumentDefinition originalArgumentDefinition, String indexSuffix, String originalSuffix) { - this.indexFieldName = originalArgumentDefinition.fullName + "Indexes"; + this.originalIsMultiValued = originalArgumentDefinition.isMultiValued; + this.indexFieldName = originalArgumentDefinition.fullName + "Index" + (originalIsMultiValued ? "es" : ""); this.originalFieldName = originalArgumentDefinition.fullName; this.indexSuffix = indexSuffix; this.originalSuffix = originalSuffix; } @Override protected Class getAnnotationIOClass() { return Input.class; } @Override public String getCommandLineAddition() { return ""; } - @Override protected String getDoc() { return "Dependencies on any indexes of " + this.originalFieldName; } + @Override protected String getDoc() { + return originalIsMultiValued + ? "Dependencies on any indexes of " + this.originalFieldName + : "Dependencies on the index of " + this.originalFieldName; + } @Override protected String getFullName() { return this.indexFieldName; } @Override protected boolean isRequired() { return false; } @Override protected String getFieldType() { return "List[File]"; } @@ -389,24 +398,41 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected Class getInnerType() { return File.class; } @Override protected String getRawFieldName() { return this.indexFieldName; } @Override protected String getFreezeFields() { - if (originalSuffix == null) { - return String.format( - ("%1$s ++= %2$s" + - ".filter(orig => orig != null)" + - ".map(orig => new File(orig.getPath + \"%3$s\"))%n"), - indexFieldName, originalFieldName, indexSuffix); + if (originalIsMultiValued) { + if (originalSuffix == null) { + return String.format( + ("%1$s ++= %2$s" + + ".filter(orig => orig != null)" + + ".map(orig => new File(orig.getPath + \"%3$s\"))%n"), + indexFieldName, originalFieldName, indexSuffix); + } else { + return String.format( + ("%1$s ++= %2$s" + + ".filter(orig => orig != null && orig.getName.endsWith(\"%4$s\"))" + + ".flatMap(orig => Array(" + + " new File(orig.getPath + \"%3$s\")," + + " new File(orig.getPath.stripSuffix(\"%4$s\") + \"%3$s\") ))%n"), + indexFieldName, originalFieldName, indexSuffix, originalSuffix); + } } else { - return String.format( - ("%1$s ++= %2$s" + - ".filter(orig => orig != null && orig.getName.endsWith(\"%4$s\"))" + - ".flatMap(orig => Array(" + - " new File(orig.getPath + \"%3$s\")," + - " new File(orig.getPath.stripSuffix(\"%4$s\") + \"%3$s\") ))%n"), - indexFieldName, originalFieldName, indexSuffix, originalSuffix); + if (originalSuffix == null) { + return String.format( + ("if (%2$s != null)%n " + + "%1$s :+= new File(%2$s.getPath + \"%3$s\")%n"), + indexFieldName, originalFieldName, indexSuffix); + } else { + return String.format( + ("if (%2$s != null && %2$s.getName.endsWith(\"%4$s\"))%n " + + "%1$s ++= Array(" + + " new File(%2$s.getPath + \"%3$s\")," + + " new File(%2$s.getPath.stripSuffix(\"%4$s\") + \"%3$s\") )%n"), + indexFieldName, originalFieldName, indexSuffix, originalSuffix); + } } } } + // Tracks an automatically generated index private static abstract class OutputIndexArgumentField extends ArgumentField { protected final String indexFieldName; protected final String originalFieldName; @@ -456,6 +482,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } } + // Allows setting the format for floats and doubles private static class FormatterArgumentField extends ArgumentField { private final ArgumentField argumentField; public FormatterArgumentField(ArgumentField argumentField) {