diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java index 86ca6c2df..f920d90ef 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -92,7 +92,10 @@ public final class IntervalBinding { codec.readHeader(lineReader); String line = lineReader.readLine(); while ( line != null ) { - intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(codec.decodeLoc(line))); + final Feature feature = codec.decodeLoc(line); + if ( feature == null ) + throw new UserException.MalformedFile(featureIntervals.getSource(), "Couldn't parse line '" + line + "'"); + intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature)); line = lineReader.readLine(); } } catch (IOException e) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 46d28185b..37a6cfc36 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -129,8 +129,14 @@ public class MalformedReadFilter extends ReadFilter { * @return true if they have the same number. False otherwise. */ private static boolean checkMismatchingBasesAndQuals(SAMRecord read, boolean filterMismatchingBaseAndQuals) { - if (!filterMismatchingBaseAndQuals) - throw new UserException.MalformedBAM(read, "BAM file has a read with mismatching number of bases and base qualities. Offender: " + read.getReadName() +" [" + read.getReadLength() + " bases] [" +read.getBaseQualities().length +"] quals"); - return (read.getReadLength() == read.getBaseQualities().length); + boolean result; + if (read.getReadLength() == read.getBaseQualities().length) + result = true; + else if (filterMismatchingBaseAndQuals) + result = false; + else + throw new UserException.MalformedBAM(read, String.format("BAM file has a read with mismatching number of bases and base qualities. Offender: %s [%d bases] [%d quals]", read.getReadName(), read.getReadLength(), read.getBaseQualities().length)); + + return result; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 28f4f2a56..4e4a1550d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -15,11 +16,14 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; @@ -143,10 +147,10 @@ public class VariantEvalWalker extends RodWalker implements Tr /** * See the -list argument to view available modules. */ - @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false) + @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noEV is specified)", required=false) protected String[] MODULES_TO_USE = {}; - @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)", required=false) + @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -EV option)", required=false) protected Boolean NO_STANDARD_MODULES = false; // Other arguments @@ -171,6 +175,19 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false) private boolean requireStrictAlleleMatch = false; + /** + * If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying + * variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc. + */ + @Argument(fullName="mergeEvals", shortName="mergeEvals", doc="If provided, all -eval tracks will be merged into a single eval track", required=false) + public boolean mergeEvals = false; + + /** + * File containing tribble-readable features for the IntervalStratificiation + */ + @Input(fullName="stratIntervals", shortName="stratIntervals", doc="File containing tribble-readable features for the IntervalStratificiation", required=false) + protected IntervalBinding intervalsFile = null; + // Variables private Set jexlExpressions = new TreeSet(); @@ -224,13 +241,8 @@ public class VariantEvalWalker extends RodWalker implements Tr knowns.add(compRod); } - // Collect the eval rod names - Set evalNames = new TreeSet(); - for ( RodBinding evalRod : evals ) - evalNames.add(evalRod.getName()); - // Now that we have all the rods categorized, determine the sample list from the eval rods. - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evalNames); + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); // Load the sample list @@ -258,6 +270,16 @@ public class VariantEvalWalker extends RodWalker implements Tr perSampleIsEnabled = true; } + if ( intervalsFile != null ) { + boolean fail = true; + for ( final VariantStratifier vs : stratificationObjects ) { + if ( vs.getClass().equals(IntervalStratification.class) ) + fail = false; + } + if ( fail ) + throw new UserException.BadArgumentValue("ST", "stratIntervals argument provided but -ST IntervalStratification not provided"); + } + // Initialize the evaluation contexts evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); @@ -289,8 +311,8 @@ public class VariantEvalWalker extends RodWalker implements Tr String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases()); // --------- track --------- sample - VariantContexts - - HashMap, HashMap>> evalVCs = variantEvalUtils.bindVariantContexts(tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled); - HashMap, HashMap>> compVCs = variantEvalUtils.bindVariantContexts(tracker, ref, comps, byFilterIsEnabled, false, false); + HashMap, HashMap>> evalVCs = variantEvalUtils.bindVariantContexts(tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals); + HashMap, HashMap>> compVCs = variantEvalUtils.bindVariantContexts(tracker, ref, comps, byFilterIsEnabled, false, false, false); // for each eval track for ( final RodBinding evalRod : evals ) { @@ -353,6 +375,8 @@ public class VariantEvalWalker extends RodWalker implements Tr } } } + + if ( mergeEvals ) break; // stop processing the eval tracks } } @@ -528,6 +552,14 @@ public class VariantEvalWalker extends RodWalker implements Tr public Set getJexlExpressions() { return jexlExpressions; } + public List getIntervals() { + if ( intervalsFile == null ) + throw new UserException.MissingArgument("stratIntervals", "Must be provided when IntervalStratification is enabled"); + + return intervalsFile.getIntervals(getToolkit()); + } + + public Set getContigNames() { final TreeSet contigs = new TreeSet(); for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { @@ -536,4 +568,7 @@ public class VariantEvalWalker extends RodWalker implements Tr return contigs; } + public GenomeLocParser getGenomeLocParser() { + return getToolkit().getGenomeLocParser(); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index e83434037..cba2781d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -41,6 +41,9 @@ public class CountVariants extends VariantEvaluator implements StandardEval { public long nDeletions = 0; @DataPoint(description = "Number of complex indels") public long nComplex = 0; + @DataPoint(description = "Number of symbolic events") + public long nSymbolic = 0; + @DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)") public long nMixed = 0; @@ -131,8 +134,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval { nMixed++; break; case SYMBOLIC: - // ignore symbolic alleles, but don't fail - // todo - consistent way of treating symbolic alleles thgoughout codebase? + nSymbolic++; break; default: throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java new file mode 100644 index 000000000..3ab618496 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/G1KPhaseITable.java @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.EnumMap; +import java.util.HashMap; +import java.util.Map; + +@Analysis(description = "Build 1000 Genome Phase I paper summary of variants table") +public class G1KPhaseITable extends VariantEvaluator { + // basic counts on various rates found + @DataPoint(description = "Number of samples") + public long nSamples = 0; + + @DataPoint(description = "Number of processed loci") + public long nProcessedLoci = 0; + + @DataPoint(description = "Number of SNPs") + public long nSNPs = 0; + @DataPoint(description = "SNP Novelty Rate") + public double SNPNoveltyRate = 0; + @DataPoint(description = "Mean number of SNPs per individual") + public long nSNPsPerSample = 0; + + @DataPoint(description = "Number of Indels") + public long nIndels = 0; + @DataPoint(description = "Indel Novelty Rate") + public double IndelNoveltyRate = 0; + @DataPoint(description = "Mean number of Indels per individual") + public long nIndelsPerSample = 0; + + @DataPoint(description = "Number of SVs") + public long nSVs = 0; + @DataPoint(description = "SV Novelty Rate") + public double SVNoveltyRate = 0; + @DataPoint(description = "Mean number of SVs per individual") + public long nSVsPerSample = 0; + + Map allVariantCounts, knownVariantCounts; + Map> countsPerSample; + + private final Map makeCounts() { + Map counts = new EnumMap(VariantContext.Type.class); + counts.put(VariantContext.Type.SNP, 0); + counts.put(VariantContext.Type.INDEL, 0); + counts.put(VariantContext.Type.SYMBOLIC, 0); + return counts; + } + + public void initialize(VariantEvalWalker walker) { + countsPerSample = new HashMap>(); + nSamples = walker.getSampleNamesForEvaluation().size(); + + for ( String sample : walker.getSampleNamesForEvaluation() ) { + countsPerSample.put(sample, makeCounts()); + } + + allVariantCounts = makeCounts(); + knownVariantCounts = makeCounts(); + } + + @Override public boolean enabled() { return true; } + + public int getComparisonOrder() { + return 2; // we only need to see each eval track + } + + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); + } + + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( eval == null || eval.isMonomorphic() ) return null; + + switch (eval.getType()) { +// case NO_VARIATION: +// // shouldn't get here +// break; + case SNP: + case INDEL: + case SYMBOLIC: + allVariantCounts.put(eval.getType(), allVariantCounts.get(eval.getType()) + 1); + if ( comp != null ) + knownVariantCounts.put(eval.getType(), knownVariantCounts.get(eval.getType()) + 1); + break; + default: + throw new UserException.BadInput("Unexpected variant context type: " + eval); + } + + // count variants per sample + for (final Genotype g : eval.getGenotypes().values()) { + if ( ! g.isNoCall() && ! g.isHomRef() ) { + int count = countsPerSample.get(g.getSampleName()).get(eval.getType()); + countsPerSample.get(g.getSampleName()).put(eval.getType(), count + 1); + } + } + + return null; // we don't capture any interesting sites + } + + private final int perSampleMean(VariantContext.Type type) { + long sum = 0; + for ( Map count : countsPerSample.values() ) { + sum += count.get(type); + } + return (int)(Math.round(sum / (1.0 * countsPerSample.size()))); + } + + private final double noveltyRate(VariantContext.Type type) { + int all = allVariantCounts.get(type); + int known = knownVariantCounts.get(type); + int novel = all - known; + return (novel / (1.0 * all)); + } + + public void finalizeEvaluation() { + nSNPs = allVariantCounts.get(VariantContext.Type.SNP); + nIndels = allVariantCounts.get(VariantContext.Type.INDEL); + nSVs = allVariantCounts.get(VariantContext.Type.SYMBOLIC); + + nSNPsPerSample = perSampleMean(VariantContext.Type.SNP); + nIndelsPerSample = perSampleMean(VariantContext.Type.INDEL); + nSVsPerSample = perSampleMean(VariantContext.Type.SYMBOLIC); + + SNPNoveltyRate = noveltyRate(VariantContext.Type.SNP); + IndelNoveltyRate = noveltyRate(VariantContext.Type.INDEL); + SVNoveltyRate = noveltyRate(VariantContext.Type.SYMBOLIC); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index e276adc32..b2b6d4165 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -15,8 +16,11 @@ public class EvalRod extends VariantStratifier implements RequiredStratification @Override public void initialize() { states = new ArrayList(); - for ( RodBinding rod : getVariantEvalWalker().getEvals() ) + for ( RodBinding rod : getVariantEvalWalker().getEvals() ) { states.add(rod.getName()); + if ( getVariantEvalWalker().mergeEvals ) + break; + } } public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java new file mode 100644 index 000000000..bf001588a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import net.sf.picard.util.IntervalTree; +import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +/** + * Stratifies the variants by whether they overlap an interval in the set provided on the command line. + * + * The primary use of this stratification is to provide a mechanism to divide asssessment of a call set up + * by whether a variant overlaps an interval or not. I use this to differentiate between variants occurring + * in CCDS exons vs. those in non-coding regions, in the 1000G call set, using a command line that looks like: + * + * -T VariantEval -R human_g1k_v37.fasta -eval 1000G.vcf -stratIntervals:BED ccds.bed -ST IntervalStratification + * + * Note that the overlap algorithm properly handles symbolic alleles with an INFO field END value. In order to + * safely use this module you should provide entire contigs worth of variants, and let the interval strat decide + * overlap, as opposed to using -L which will not properly work with symbolic variants. + */ +public class IntervalStratification extends VariantStratifier { + final protected static Logger logger = Logger.getLogger(IntervalStratification.class); + final Map> intervalTreeByContig = new HashMap>(); + + @Override + public void initialize() { + final List locs = getVariantEvalWalker().getIntervals(); + + if ( locs.isEmpty() ) + throw new UserException.BadArgumentValue("stratIntervals", "Contains no intervals. Perhaps the file is malformed or empty?"); + + logger.info(String.format("Creating IntervalStratification containing %d intervals covering %d bp", + locs.size(), IntervalUtils.intervalSize(locs))); + + // set up the map from contig -> interval tree + for ( final String contig : getVariantEvalWalker().getContigNames() ) + intervalTreeByContig.put(contig, new IntervalTree()); + + for ( final GenomeLoc loc : locs ) { + intervalTreeByContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), true); + } + + states = new ArrayList(Arrays.asList("all", "overlaps.intervals", "outside.intervals")); + } + + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + final ArrayList relevantStates = new ArrayList(Arrays.asList("all")); + + if (eval != null) { + final GenomeLoc loc = getVariantEvalWalker().getGenomeLocParser().createGenomeLoc(eval, true); + IntervalTree intervalTree = intervalTreeByContig.get(loc.getContig()); + IntervalTree.Node node = intervalTree.minOverlapper(loc.getStart(), loc.getStop()); + //logger.info(String.format("Overlap %s found %s", loc, node)); + relevantStates.add( node != null ? "overlaps.intervals" : "outside.intervals"); + } + + return relevantStates; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 6a057a456..aa8c6cfb9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -312,12 +312,20 @@ public class VariantEvalUtils { * * @return the mapping of track to VC list that should be populated */ - public HashMap, HashMap>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, List> tracks, boolean byFilter, boolean subsetBySample, boolean trackPerSample) { + public HashMap, HashMap>> + bindVariantContexts(RefMetaDataTracker tracker, + ReferenceContext ref, + List> tracks, + boolean byFilter, + boolean subsetBySample, + boolean trackPerSample, + boolean mergeTracks) { if ( tracker == null ) return null; HashMap, HashMap>> bindings = new HashMap, HashMap>>(); + RodBinding firstTrack = tracks.isEmpty() ? null : tracks.get(0); for ( RodBinding track : tracks ) { HashMap> mapping = new HashMap>(); @@ -346,7 +354,20 @@ public class VariantEvalUtils { } } - bindings.put(track, mapping); + if ( mergeTracks && bindings.containsKey(firstTrack) ) { + // go through each binding of sample -> value and add all of the bindings from this entry + HashMap> firstMapping = bindings.get(firstTrack); + for ( Map.Entry> elt : mapping.entrySet() ) { + Set firstMappingSet = firstMapping.get(elt.getKey()); + if ( firstMappingSet != null ) { + firstMappingSet.addAll(elt.getValue()); + } else { + firstMapping.put(elt.getKey(), elt.getValue()); + } + } + } else { + bindings.put(track, mapping); + } } return bindings; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index ce03dfffe..573e15971 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -162,7 +162,7 @@ public class CombineVariants extends RodWalker { private boolean sitesOnlyVCF = false; public void initialize() { - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null); + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit()); if ( PRIORITY_STRING == null ) { PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 8d9768681..e10bcbaa0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -35,8 +35,10 @@ import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.Feature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * Factory class for creating GenomeLocs @@ -453,6 +455,28 @@ public class GenomeLocParser { return createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd()); } + /** + * Creates a GenomeLoc corresponding to the variant context vc. If includeSymbolicEndIfPossible + * is true, and VC is a symbolic allele the end of the created genome loc will be the value + * of the END info field key, if it exists, or vc.getEnd() if not. + * + * @param vc + * @param includeSymbolicEndIfPossible + * @return + */ + public GenomeLoc createGenomeLoc(final VariantContext vc, boolean includeSymbolicEndIfPossible) { + if ( includeSymbolicEndIfPossible && vc.isSymbolic() ) { + int end = vc.getAttributeAsInt(VCFConstants.END_KEY, vc.getEnd()); + return createGenomeLoc(vc.getChr(), vc.getStart(), end); + } + else + return createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd()); + } + + public GenomeLoc createGenomeLoc(final VariantContext vc) { + return createGenomeLoc(vc, false); + } + /** * create a new genome loc, given the contig name, and a single position. Must be on the reference * diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index edc1413ba..68b220aab 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -150,7 +150,7 @@ public class SampleUtils { // iterate to get all of the sample names - for ( Map.Entry pair : VCFUtils.getVCFHeadersFromRods(toolkit, null).entrySet() ) { + for ( Map.Entry pair : VCFUtils.getVCFHeadersFromRods(toolkit).entrySet() ) { Set vcfSamples = pair.getValue().getGenotypeSamples(); for ( String sample : vcfSamples ) addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey()); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index 2d8421507..5bd6a9b32 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -26,6 +26,8 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -41,6 +43,18 @@ public class VCFUtils { */ private VCFUtils() { } + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, List> rodBindings) { + // Collect the eval rod names + final Set names = new TreeSet(); + for ( final RodBinding evalRod : rodBindings ) + names.add(evalRod.getName()); + return getVCFHeadersFromRods(toolkit, names); + } + + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit) { + return getVCFHeadersFromRods(toolkit, (Collection)null); + } + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { Map data = new HashMap(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index cbfe0ceab..3dceb9bd2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -70,7 +70,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("1fefd6cf9c2554d5f886c3998defd4d0") + Arrays.asList("fb926edfd3d811e18b33798a43ef4379") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -91,7 +91,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("d470e00a368b5a0468012818994c6a89") + Arrays.asList("26b7d57e3a204ac80a28cb29485b59b7") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -113,7 +113,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("12856e52c2682328f91594089328596c") + Arrays.asList("1df8184062f330bea9da8bacacc5a09d") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -134,7 +134,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("91610b9240f64e0eb03cfd2602cf57af") + Arrays.asList("927f26414509db9e7c0a2c067d57c949") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -155,7 +155,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e40b77e7ed6581328e373a24b93cd170") + Arrays.asList("e6fddefd95122cabc5a0f0b95bce6d34") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -176,7 +176,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("15beaf3823c131cabc5fb0445239f978") + Arrays.asList("df10486dae73a9cf8c647964f51ba3e0") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -197,7 +197,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("7ddd4ee74938d229ce5cb7b9b9104abe") + Arrays.asList("524adb0b7ff70e227b8803a88f36713e") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -220,7 +220,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a90f33906a732ef5eb346e559c96ccc1") + Arrays.asList("ef6449789dfc032602458b7c5538a1bc") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -245,7 +245,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2567f90d3d7354850c5a59730ecc6e4f") + Arrays.asList("13b90e94fa82d72bb04a0a5addb27c3f") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -264,7 +264,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("fa091aa8967893389c51102fd9f0bebb") + Arrays.asList("8458b9d7803d75aae551fac7dbd152d6") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("f70997b6a3e7fdc89d11e1d61a2463d4")); + 1, Arrays.asList("b954dee127ec4205ed7d33c91aa3e045")); executeTestParallel("testSelect1", spec); } @@ -294,7 +294,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("407682de41dcf139ea635e9cda21b912")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ae0027197547731a9a5c1eec5fbe0221")); executeTestParallel("testCompVsEvalAC",spec); } @@ -324,7 +324,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("424c9d438b1faa59b2c29413ba32f37b")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("835b44fc3004cc975c968c9f92ed25d6")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -336,7 +336,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("18fa0b89ebfff51141975d7e4ce7a159")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("f0e003f1293343c3210ae95e8936b19a")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -414,8 +414,29 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("da65fc8f0d0eeaf0a0b06a07f444bb8e") + Arrays.asList("924b6123edb9da540d0abc66f6f33e16") ); executeTest("testAlleleCountStrat", spec); } + + @Test + public void testIntervalStrat() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-eval " + testDir + "/withSymbolic.b37.vcf", + "-noEV", + "-EV CountVariants", + "-noST", + "-stratIntervals " + testDir + "/overlapTest.bed", + "-ST IntervalStratification", + "-L 20", + "-o %s" + ), + 1, + Arrays.asList("9794e2dba205c6929dc89899fdf0bf6b") + ); + executeTest("testIntervalStrat", spec); + } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index e8091cde7..768eab7e4 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -129,9 +129,11 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Writing JobLogging GATKReport to file " + reportFile) QJobReport.printReport(qGraph.getFunctionsAndStatus(script.functions), reportFile) - val pdfFile = new File(jobStringName + ".pdf") - logger.info("Plotting JobLogging GATKReport to file " + pdfFile) - QJobReport.plotReport(reportFile, pdfFile) + if ( settings.run ) { + val pdfFile = new File(jobStringName + ".pdf") + logger.info("Plotting JobLogging GATKReport to file " + pdfFile) + QJobReport.plotReport(reportFile, pdfFile) + } } } } diff --git a/public/testdata/overlapTest.bed b/public/testdata/overlapTest.bed new file mode 100644 index 000000000..6859f1fdc --- /dev/null +++ b/public/testdata/overlapTest.bed @@ -0,0 +1,3 @@ +20 315000 315100 # should overlap 2 in withSymbolic.vcf at 315006 and 315072 +20 316955 316959 # should overlap only deletion variant at 316952 +20 317900 400000 # should overlap only the symbolic variant at 317173 diff --git a/public/testdata/withSymbolic.b37.vcf b/public/testdata/withSymbolic.b37.vcf new file mode 100644 index 000000000..4974f1229 --- /dev/null +++ b/public/testdata/withSymbolic.b37.vcf @@ -0,0 +1,96 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO +20 315006 . A G 100 PASS LCSNP;EXSNP;BAVGPOST=0.998;BRSQ=0.721;LDAF=0.0025;AVGPOST=0.9978;RSQ=0.6449;ERATE=0.0008;THETA=0.0006;AC=4;AN=2184 +20 315072 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.996;LDAF=0.0057;AVGPOST=0.9996;RSQ=0.9743;ERATE=0.0003;THETA=0.0016;AC=12;AN=2184 +20 315162 . T G 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.950;LDAF=0.0005;AVGPOST=0.9998;RSQ=0.8078;ERATE=0.0003;THETA=0.0004;AC=1;AN=2184 +20 315168 rs71327439 G GA 7142 PASS INDEL;BAVGPOST=0.999;BRSQ=0.990;LDAF=0.0575;AVGPOST=0.9985;RSQ=0.9891;ERATE=0.0003;THETA=0.0004;AC=125;AN=2184 +20 315201 . A G 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.965;LDAF=0.0005;AVGPOST=0.9999;RSQ=0.8599;ERATE=0.0003;THETA=0.0008;AC=1;AN=2184 +20 315214 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.974;LDAF=0.0009;AVGPOST=0.9990;RSQ=0.5860;ERATE=0.0003;THETA=0.0005;AC=1;AN=2184 +20 315270 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.999;LDAF=0.0557;AVGPOST=0.9992;RSQ=0.9950;ERATE=0.0003;THETA=0.0004;AC=121;AN=2184 +20 315279 . A G 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.991;LDAF=0.0572;AVGPOST=0.9990;RSQ=0.9926;ERATE=0.0003;THETA=0.0013;AC=125;AN=2184 +20 315320 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.995;LDAF=0.0028;AVGPOST=0.9998;RSQ=0.9594;ERATE=0.0005;THETA=0.0003;AC=6;AN=2184 +20 315322 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.814;LDAF=0.0007;AVGPOST=0.9994;RSQ=0.6510;ERATE=0.0003;THETA=0.0004;AC=1;AN=2184 +20 315431 . A T 100 PASS LCSNP;EXSNP;BAVGPOST=0.987;BRSQ=0.864;LDAF=0.0431;AVGPOST=0.9873;RSQ=0.8675;ERATE=0.0006;THETA=0.0010;AC=86;AN=2184 +20 315481 . T A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.998;LDAF=0.0619;AVGPOST=0.9993;RSQ=0.9948;ERATE=0.0003;THETA=0.0007;AC=135;AN=2184 +20 315490 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.984;LDAF=0.0138;AVGPOST=0.9971;RSQ=0.9260;ERATE=0.0003;THETA=0.0046;AC=31;AN=2184 +20 315523 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.822;LDAF=0.0015;AVGPOST=0.9988;RSQ=0.6777;ERATE=0.0005;THETA=0.0003;AC=2;AN=2184 +20 315547 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.893;LDAF=0.0026;AVGPOST=0.9995;RSQ=0.9024;ERATE=0.0003;THETA=0.0004;AC=5;AN=2184 +20 315549 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.888;LDAF=0.0029;AVGPOST=0.9988;RSQ=0.8565;ERATE=0.0003;THETA=0.0013;AC=5;AN=2184 +20 315551 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.829;LDAF=0.0008;AVGPOST=0.9992;RSQ=0.5723;ERATE=0.0003;THETA=0.0012;AC=1;AN=2184 +20 315704 . G C 100 PASS LCSNP;EXSNP;BAVGPOST=0.998;BRSQ=0.945;LDAF=0.0184;AVGPOST=0.9978;RSQ=0.9523;ERATE=0.0003;THETA=0.0017;AC=40;AN=2184 +20 315798 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.773;LDAF=0.0012;AVGPOST=0.9985;RSQ=0.4929;ERATE=0.0003;THETA=0.0005;AC=1;AN=2184 +20 315842 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.979;LDAF=0.0110;AVGPOST=0.9991;RSQ=0.9673;ERATE=0.0003;THETA=0.0005;AC=23;AN=2184 +20 315876 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.926;LDAF=0.0018;AVGPOST=0.9988;RSQ=0.7731;ERATE=0.0003;THETA=0.0007;AC=3;AN=2184 +20 316028 . G C 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.999;LDAF=0.0714;AVGPOST=0.9997;RSQ=0.9982;ERATE=0.0003;THETA=0.0003;AC=156;AN=2184 +20 316055 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.997;LDAF=0.1006;AVGPOST=0.9993;RSQ=0.9969;ERATE=0.0003;THETA=0.0007;AC=220;AN=2184 +20 316137 . G C 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.976;LDAF=0.0037;AVGPOST=0.9982;RSQ=0.7861;ERATE=0.0004;THETA=0.0009;AC=6;AN=2184 +20 316142 . C A 100 PASS LCSNP;EXSNP;BAVGPOST=0.998;BRSQ=0.793;LDAF=0.0033;AVGPOST=0.9980;RSQ=0.7527;ERATE=0.0003;THETA=0.0003;AC=6;AN=2184 +20 316143 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.839;LDAF=0.0034;AVGPOST=0.9984;RSQ=0.8054;ERATE=0.0003;THETA=0.0003;AC=6;AN=2184 +20 316211 . C G 100 PASS LCSNP;EXSNP;BAVGPOST=0.997;BRSQ=0.976;LDAF=0.0565;AVGPOST=0.9983;RSQ=0.9872;ERATE=0.0003;THETA=0.0010;AC=124;AN=2184 +20 316285 . A AT 5514 PASS INDEL;BAVGPOST=0.999;BRSQ=0.993;LDAF=0.0552;AVGPOST=0.9978;RSQ=0.9829;ERATE=0.0004;THETA=0.0005;AC=119;AN=2184 +20 316295 . A G 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.808;LDAF=0.0021;AVGPOST=0.9980;RSQ=0.6390;ERATE=0.0003;THETA=0.0008;AC=4;AN=2184 +20 316481 . G T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=1.000;LDAF=0.0499;AVGPOST=0.9997;RSQ=0.9970;ERATE=0.0003;THETA=0.0007;AC=109;AN=2184 +20 316488 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.897;LDAF=0.0011;AVGPOST=0.9997;RSQ=0.8509;ERATE=0.0003;THETA=0.0006;AC=2;AN=2184 +20 316553 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.945;LDAF=0.0007;AVGPOST=0.9996;RSQ=0.7074;ERATE=0.0003;THETA=0.0004;AC=1;AN=2184 +20 316659 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.998;LDAF=0.0497;AVGPOST=0.9995;RSQ=0.9960;ERATE=0.0003;THETA=0.0007;AC=109;AN=2184 +20 316691 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.985;LDAF=0.0058;AVGPOST=0.9989;RSQ=0.9301;ERATE=0.0003;THETA=0.0003;AC=13;AN=2184 +20 316700 . A G 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.737;LDAF=0.0030;AVGPOST=0.9971;RSQ=0.5700;ERATE=0.0009;THETA=0.0016;AC=4;AN=2184 +20 316725 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.853;LDAF=0.0011;AVGPOST=0.9995;RSQ=0.7944;ERATE=0.0003;THETA=0.0004;AC=2;AN=2184 +20 316770 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.966;LDAF=0.0017;AVGPOST=0.9991;RSQ=0.7543;ERATE=0.0005;THETA=0.0005;AC=3;AN=2184 +20 316813 . GTTC G 1701 PASS INDEL;BAVGPOST=0.995;BRSQ=0.850;LDAF=0.0156;AVGPOST=0.9987;RSQ=0.9611;ERATE=0.0003;THETA=0.0006;AC=34;AN=2184 +20 316824 rs11472305 T TTTC 5129 PASS INDEL;BAVGPOST=0.979;BRSQ=0.863;LDAF=0.0697;AVGPOST=0.9782;RSQ=0.8837;ERATE=0.0016;THETA=0.0006;AC=153;AN=2184 +20 316839 . CT C 1122 PASS INDEL;BAVGPOST=0.980;BRSQ=0.804;LDAF=0.0505;AVGPOST=0.9743;RSQ=0.8100;ERATE=0.0040;THETA=0.0009;AC=104;AN=2184 +20 316841 . TTC T 1096 PASS INDEL;BAVGPOST=0.981;BRSQ=0.824;LDAF=0.0491;AVGPOST=0.9783;RSQ=0.8367;ERATE=0.0029;THETA=0.0005;AC=105;AN=2184 +20 316842 . TCC T 110 PASS INDEL;BAVGPOST=0.983;BRSQ=0.592;LDAF=0.0203;AVGPOST=0.9772;RSQ=0.5576;ERATE=0.0022;THETA=0.0005;AC=30;AN=2184 +20 316845 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.645;LDAF=0.0015;AVGPOST=0.9989;RSQ=0.6666;ERATE=0.0003;THETA=0.0010;AC=2;AN=2184 +20 316853 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=0.990;BRSQ=0.922;LDAF=0.0688;AVGPOST=0.9742;RSQ=0.8456;ERATE=0.0049;THETA=0.0006;AC=133;AN=2184 +20 316882 rs11479165 CT C 145 PASS INDEL;BAVGPOST=0.991;BRSQ=0.484;LDAF=0.0074;AVGPOST=0.9920;RSQ=0.5655;ERATE=0.0005;THETA=0.0014;AC=9;AN=2184 +20 316889 . T TTC 108 PASS INDEL;BAVGPOST=0.978;BRSQ=0.235;LDAF=0.0521;AVGPOST=0.9304;RSQ=0.4479;ERATE=0.0035;THETA=0.0016;AC=63;AN=2184 +20 316901 . T TTC 272 PASS INDEL;BAVGPOST=0.979;BRSQ=0.510;LDAF=0.0363;AVGPOST=0.9527;RSQ=0.4348;ERATE=0.0071;THETA=0.0003;AC=34;AN=2184 +20 316917 . CT C 67 PASS INDEL;BAVGPOST=0.983;BRSQ=0.483;LDAF=0.0265;AVGPOST=0.9828;RSQ=0.7509;ERATE=0.0006;THETA=0.0011;AC=51;AN=2184 +20 316918 rs112071142 TTTC T 3049 PASS INDEL;BAVGPOST=0.980;BRSQ=0.801;LDAF=0.0588;AVGPOST=0.9761;RSQ=0.8444;ERATE=0.0026;THETA=0.0005;AC=120;AN=2184 +20 316924 . CT C 8 PASS INDEL;BAVGPOST=0.999;BRSQ=0.581;LDAF=0.0053;AVGPOST=0.9932;RSQ=0.5513;ERATE=0.0004;THETA=0.0011;AC=6;AN=2184 +20 316936 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=0.994;BRSQ=0.651;LDAF=0.0098;AVGPOST=0.9884;RSQ=0.5505;ERATE=0.0018;THETA=0.0005;AC=13;AN=2184 +20 316939 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=0.997;BRSQ=0.720;LDAF=0.0071;AVGPOST=0.9899;RSQ=0.4845;ERATE=0.0010;THETA=0.0033;AC=8;AN=2184 +20 316952 . CTCTTCCTCTTCT C 15835 PASS INDEL;BAVGPOST=0.891;BRSQ=0.667;LDAF=0.1650;AVGPOST=0.9042;RSQ=0.7274;ERATE=0.0037;THETA=0.0008;AC=294;AN=2184 +20 317003 . TCC T 333 PASS INDEL;BAVGPOST=0.890;BRSQ=0.114;LDAF=0.3450;AVGPOST=0.5526;RSQ=0.1674;ERATE=0.0300;THETA=0.0011;AC=350;AN=2184 +20 317022 rs111424933 TTTC T 4776 PASS INDEL;BAVGPOST=0.982;BRSQ=0.865;LDAF=0.0561;AVGPOST=0.9837;RSQ=0.8889;ERATE=0.0017;THETA=0.0004;AC=110;AN=2184 +20 317057 . C A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.775;LDAF=0.0014;AVGPOST=0.9985;RSQ=0.6049;ERATE=0.0003;THETA=0.0002;AC=3;AN=2184 +20 317135 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=0.995;BRSQ=0.992;LDAF=0.0489;AVGPOST=0.9964;RSQ=0.9749;ERATE=0.0004;THETA=0.0025;AC=106;AN=2184 +20 317173 MERGED_DEL_2_99440 A . . SV;BAVGPOST=0.998;BRSQ=0.577;LDAF=0.0018;AVGPOST=0.9990;RSQ=0.7465;ERATE=0.0003;THETA=0.0007;CIEND=-61,92;CIPOS=-84,73;END=319201;SOURCE=BreakDancer_317182_319211,GenomeStrip_317164_319190;SVTYPE=DEL;AC=2;AN=2184 +20 317174 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=0.995;BRSQ=0.991;LDAF=0.0502;AVGPOST=0.9962;RSQ=0.9727;ERATE=0.0005;THETA=0.0007;AC=107;AN=2184 +20 317266 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.999;LDAF=0.1748;AVGPOST=0.9988;RSQ=0.9973;ERATE=0.0003;THETA=0.0005;AC=383;AN=2184 +20 317448 . A G 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.999;LDAF=0.0068;AVGPOST=0.9988;RSQ=0.9405;ERATE=0.0003;THETA=0.0002;AC=16;AN=2184 +20 317491 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=0.999;BRSQ=0.991;LDAF=0.0609;AVGPOST=0.9993;RSQ=0.9939;ERATE=0.0003;THETA=0.0004;AC=133;AN=2184 +20 317546 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.837;LDAF=0.0007;AVGPOST=0.9994;RSQ=0.6154;ERATE=0.0003;THETA=0.0012;AC=1;AN=2184 +20 317578 . C T 100 PASS LCSNP;EXSNP;BAVGPOST=0.996;BRSQ=0.882;LDAF=0.0180;AVGPOST=0.9981;RSQ=0.9606;ERATE=0.0003;THETA=0.0004;AC=40;AN=2184 +20 317658 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=1.000;LDAF=0.0609;AVGPOST=0.9998;RSQ=0.9980;ERATE=0.0003;THETA=0.0005;AC=133;AN=2184 +20 317676 . T C 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=1.000;LDAF=0.0608;AVGPOST=0.9999;RSQ=0.9992;ERATE=0.0003;THETA=0.0004;AC=133;AN=2184 +20 317710 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.952;LDAF=0.0014;AVGPOST=0.9997;RSQ=0.8880;ERATE=0.0004;THETA=0.0008;AC=3;AN=2184 +20 317824 . G A 100 PASS LCSNP;EXSNP;BAVGPOST=1.000;BRSQ=0.999;LDAF=0.0555;AVGPOST=0.9993;RSQ=0.9947;ERATE=0.0003;THETA=0.0009;AC=121;AN=2184