From 3b51d9106aa4035a0bb6743ae025d8620eaf6247 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 21 Sep 2011 16:40:29 -0400 Subject: [PATCH 01/68] Adding in likelihood calculations for mendelian violations. Also fixing a minor and rare bug in SelectVariants when specifying family structure on the command line. --- .../walkers/annotator/VariantAnnotator.java | 6 +++ .../walkers/variantutils/SelectVariants.java | 2 +- .../sting/utils/MendelianViolation.java | 42 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index fb3dbc3cf..27a750f99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; + @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation") + public String familyStr = null; + + @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") + public double minGenotypeQualityP = 0.0; + private VariantAnnotatorEngine engine; private Collection indelBufferContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 018c4dcc2..e1de06ec0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker { throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } } else - mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); } else if (!FAMILY_STRUCTURE.isEmpty()) { mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index c6a07b5ce..8da118174 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.utils; +import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.regex.Matcher; @@ -32,6 +34,9 @@ public class MendelianViolation { private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + public String getSampleMom() { return sampleMom; @@ -168,4 +173,41 @@ public class MendelianViolation { return true; } + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } } From 70335b2b0a967f31b1aa063ace4845bff413b086 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 21 Sep 2011 17:12:01 -0400 Subject: [PATCH 03/68] Hard clipping soft clipped reads to fix misalignments. Pre-softclipped reads (with high qual) are a complicated event to deal with in the Reduced Reads environment. I chose to hard clip them out for now and added a todo item to bring them back on in the future, perhaps as a variant region. --- .../sting/utils/clipreads/ReadClipper.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index a8a3fad9e..9b9ce4fdf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.clipreads; import com.google.java.contract.Requires; +import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; @@ -8,6 +9,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; /** @@ -128,6 +130,39 @@ public class ReadClipper { return this.clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public SAMRecord hardClipSoftClippedBases () { + int readIndex = 0; + int cutLeft = -1; // first position to hard clip (inclusive) + int cutRight = -1; // first position to hard clip (inclusive) + boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail + + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { + if (rightTail) { + cutRight = readIndex; + } + else { + cutLeft = readIndex + cigarElement.getLength() - 1; + } + } + else + rightTail = true; + + if (cigarElement.getOperator().consumesReadBases()) + readIndex += cigarElement.getLength(); + } + + // It is extremely important that we cut the end first otherwise the read coordinates change. + if (cutRight >= 0) + this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1)); + if (cutLeft >= 0) + this.addOp(new ClippingOp(0, cutLeft)); + + return clipRead(ClippingRepresentation.HARDCLIP_BASES); + } + + + /** * Return a new read corresponding to this.read that's been clipped according to ops, if any are present. * From faff6e401998ca39ed00553185fcc20e312f3130 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 21 Sep 2011 18:15:23 -0400 Subject: [PATCH 04/68] Failed to commit changes to the GATKReport required for more easy access when using the files as data sources (read: histograms) for walkers --- .../sting/gatk/report/GATKReportColumns.java | 14 +++++++++++++- .../sting/gatk/report/GATKReportTable.java | 4 ++++ .../sting/gatk/report/GATKReportVersion.java | 0 3 files changed, 17 insertions(+), 1 deletion(-) mode change 100644 => 100755 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java mode change 100644 => 100755 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java old mode 100644 new mode 100755 index a33631c85..a73123b6c --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -24,12 +24,14 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.collections.Pair; + import java.util.*; /** * Tracks a linked list of GATKReportColumn in order by name. */ -public class GATKReportColumns extends LinkedHashMap { +public class GATKReportColumns extends LinkedHashMap implements Iterable { private List columnNames = new ArrayList(); /** @@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap { columnNames.add(key); return super.put(key, value); } + + @Override + public Iterator iterator() { + return new Iterator() { + int offset = 0; + public boolean hasNext() { return offset < columnNames.size() ; } + public GATKReportColumn next() { return getByIndex(offset++); } + public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); } + }; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 3e3aa29a7..2fd5ad7e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -286,6 +286,10 @@ public class GATKReportTable { } } + public boolean containsKey(Object primaryKey) { + return primaryKeyColumn.contains(primaryKey); + } + /** * Set the value for a given position in the table * diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java old mode 100644 new mode 100755 From f9cdc119afec2d0d4dd76e04f82e138c9cf96dca Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 21 Sep 2011 18:16:42 -0400 Subject: [PATCH 05/68] Added a method to ReadUtils that converts reads of the form 10S20M10S to 40M (just unclips the soft-clips). Be careful when using this - if you're writing a bam file it will be potentially written out of order (since the previous alignment start was at the M, not the S). --- .../sting/utils/sam/ReadUtils.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) mode change 100644 => 100755 public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java old mode 100644 new mode 100755 index 5d3ef3086..60c9c1780 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -681,6 +681,9 @@ public class ReadUtils { @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + if ( read.getCigar().numCigarElements() == 1 && read.getCigar().getCigarElement(0).getOperator().equals(CigarOperator.INSERTION)) { + return read.getUnclippedEnd(); + } int stop = read.getUnclippedStart(); if (readIsEntirelyInsertion(read)) @@ -787,5 +790,47 @@ public class ReadUtils { return readBases; } + public static SAMRecord unclipSoftClippedBases(SAMRecord rec) { + int newReadStart = rec.getAlignmentStart(); + int newReadEnd = rec.getAlignmentEnd(); + List newCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + int heldOver = -1; + boolean sSeen = false; + for ( CigarElement e : rec.getCigar().getCigarElements() ) { + if ( e.getOperator().equals(CigarOperator.S) ) { + newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M)); + if ( sSeen ) { + newReadEnd += e.getLength(); + sSeen = true; + } else { + newReadStart -= e.getLength(); + } + } else { + newCigarElements.add(e); + } + } + // merge duplicate operators together + int idx = 0; + List finalCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + while ( idx < newCigarElements.size() -1 ) { + if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) { + int combSize = newCigarElements.get(idx).getLength(); + int offset = 0; + while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) { + combSize += newCigarElements.get(idx+offset+1).getLength(); + offset++; + } + finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator())); + idx = idx + offset -1; + } else { + finalCigarElements.add(newCigarElements.get(idx)); + } + idx++; + } + rec.setCigar(new Cigar(finalCigarElements)); + rec.setAlignmentStart(newReadStart); + + return rec; + } } From 5d0f284305ec4274ec255474a1cd536574fc1a39 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 21 Sep 2011 20:26:28 -0400 Subject: [PATCH 06/68] Fixing exome specific arguments to the VQSR in the methods development calling pipeline --- .../qscripts/MethodsDevelopmentCallingPipeline.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index 29ee2769b..d187c1e1a 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -266,13 +266,18 @@ class MethodsDevelopmentCallingPipeline extends QScript { this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" ) this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" ) this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS") - if(t.nSamples >= 10) { + if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate this.use_annotation ++= List("InbreedingCoeff") } if(!t.isExome) { this.use_annotation ++= List("DP") - } else { + } else { // exome specific parameters + this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" ) this.mG = 6 + if(t.nSamples <= 3) { // very few exome samples means very few variants + this.mG = 4 + this.percentBad = 0.04 + } } this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile } this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile } From 8f8b59a932923d29fdeb91a7ef90d1eef24ca1fd Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Sep 2011 22:23:28 -0400 Subject: [PATCH 07/68] My interpretation of the VCF spec is that the FORMAT field should only be present if there is genotype/sample data. So the VCFCodec now throws an exception when it encounters such a case. I had to fix one of the integration test VCFs. --- .../utils/codecs/vcf/AbstractVCFCodec.java | 38 ++++++++++--------- .../sting/utils/codecs/vcf/VCFHeader.java | 17 +++------ .../sting/utils/exceptions/UserException.java | 6 +++ 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 83c7083d0..43b07476d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -115,15 +115,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, } arrayIndex++; } + + boolean sawFormatTag = false; if ( arrayIndex < strings.length ) { if ( !strings[arrayIndex].equals("FORMAT") ) throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'"); + sawFormatTag = true; arrayIndex++; } - while (arrayIndex < strings.length) + while ( arrayIndex < strings.length ) auxTags.add(strings[arrayIndex++]); + if ( sawFormatTag && auxTags.size() == 0 ) + throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data"); + } else { if ( str.startsWith("##INFO=") ) { VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); @@ -200,28 +206,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * @return a VariantContext */ public Feature decode(String line) { - return reallyDecode(line); - } + // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line + if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; - private Feature reallyDecode(String line) { - // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line - if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; + // our header cannot be null, we need the genotype sample names and counts + if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); - // our header cannot be null, we need the genotype sample names and counts - if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); + if (parts == null) + parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)]; - if (parts == null) - parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)]; + int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true); - int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true); + // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) + if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || + (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) + throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + + " tokens, and saw " + nParts + " )", lineNo); - // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) - if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || - (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) - throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + - " tokens, and saw " + nParts + " )", lineNo); - - return parseVCFLine(parts); + return parseVCFLine(parts); } protected void generateException(String message) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index fd1c74993..66e11bc1e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -35,9 +35,6 @@ public class VCFHeader { // the header string indicator public static final String HEADER_INDICATOR = "#"; - /** do we have genotying data? */ - private boolean hasGenotypingData = false; - // were the input samples sorted originally (or are we sorting them)? private boolean samplesWereAlreadySorted = true; @@ -57,17 +54,15 @@ public class VCFHeader { * create a VCF header, given a list of meta data and auxillary tags * * @param metaData the meta data associated with this header - * @param genotypeSampleNames the genotype format field, and the sample names + * @param genotypeSampleNames the sample names */ public VCFHeader(Set metaData, Set genotypeSampleNames) { mMetaData = new TreeSet(); if ( metaData != null ) mMetaData.addAll(metaData); - for (String col : genotypeSampleNames) { - if (!col.equals("FORMAT")) - mGenotypeSampleNames.add(col); - } - if (genotypeSampleNames.size() > 0) hasGenotypingData = true; + + mGenotypeSampleNames.addAll(genotypeSampleNames); + loadVCFVersion(); loadMetaDataMaps(); @@ -157,7 +152,7 @@ public class VCFHeader { * @return true if we have genotyping columns, false otherwise */ public boolean hasGenotypingData() { - return hasGenotypingData; + return mGenotypeSampleNames.size() > 0; } /** @@ -171,7 +166,7 @@ public class VCFHeader { /** @return the column count */ public int getColumnCount() { - return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); + return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 274c64f42..70f7387f4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -174,6 +174,12 @@ public class UserException extends ReviewedStingException { } } + public static class MalformedVCFHeader extends UserException { + public MalformedVCFHeader(String message) { + super(String.format("The provided VCF file has a malformed header: %s", message)); + } + } + public static class ReadMissingReadGroup extends MalformedBAM { public ReadMissingReadGroup(SAMRecord read) { super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName())); From b8ea9ceb68f3ded503bac08cd95e3a15237d5930 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Sep 2011 22:43:31 -0400 Subject: [PATCH 08/68] Adding integration test that uses the -V:dbsnp binding to make sure it won't fail later on if someone messes with Tribble. --- .../SelectVariantsIntegrationTest.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 20409d4ca..e4ded491b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile), + baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile), 1, Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); @@ -30,7 +30,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant:VCF3 " + testfile, + "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile, 1, Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd") ); @@ -43,7 +43,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testfile = validationDataLocation + "test.dup.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile), + baseTestString(" -sn A -sn B -sn C --variant " + testfile), 1, Arrays.asList("b74038779fe6485dbb8734ae48178356") ); @@ -56,7 +56,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); @@ -69,7 +69,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); @@ -90,16 +90,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testVariantTypeSelection--" + testFile, spec); } - @Test(enabled=false) - public void testRemovePLs() { + @Test + public void testUsingDbsnpName() { String testFile = validationDataLocation + "combine.3.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("") + Arrays.asList("167a1265df820978a74c267df44d5c43") ); - executeTest("testWithPLs--" + testFile, spec); + executeTest("testUsingDbsnpName--" + testFile, spec); } } From f81a41b889faec164f2034a70427a3bbd1934db2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 10:30:25 -0400 Subject: [PATCH 09/68] Updating MD5s for CombineVariants -- Old version had broken RSIDs, new version is fixed. No longer see rs1234,. as it is now just rs1234 --- .../variantutils/CombineVariantsIntegrationTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 74ff850a4..b65de9d36 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -89,8 +89,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format - @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "d76cd5b3ced7745d42fe0af39ce0b32e"); } + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a8a6e7589f22e0b6c5d222066b9a2093"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e")); + Arrays.asList("212d9d3df10bb29e2c7fb226da422dc0")); executeTest("threeWayWithRefs", spec); } From 982c47bfa7ce48e2d4d53a445cff958212354ab0 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Thu, 22 Sep 2011 10:58:26 -0400 Subject: [PATCH 10/68] Remove duplicate effort in ReadUtils (with apologies to Mauricio) Big (but not major) cleanup of code in ILG - mostly excising the old likelihood model Activated the early-abort check for ILG. I think it should be better this way. --- .../java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 60c9c1780..c328bbc5a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -681,9 +681,6 @@ public class ReadUtils { @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { - if ( read.getCigar().numCigarElements() == 1 && read.getCigar().getCigarElement(0).getOperator().equals(CigarOperator.INSERTION)) { - return read.getUnclippedEnd(); - } int stop = read.getUnclippedStart(); if (readIsEntirelyInsertion(read)) From 623c49765d7a70cd87e68cee913bd133eaf168aa Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 11:13:40 -0400 Subject: [PATCH 12/68] NO BAQ ON EXOMES! says the boss. --- .../queue/qscripts/MethodsDevelopmentCallingPipeline.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index 29ee2769b..34bc43ff5 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -221,7 +221,7 @@ class MethodsDevelopmentCallingPipeline extends QScript { this.max_deletion_fraction = qscript.deletions this.out = t.rawVCF this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP - this.baq = if (noBAQ) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY} + this.baq = if (noBAQ || t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY} this.analysisName = t.name + "_UGs" this.jobName = queueLogDir + t.name + ".snpcall" } From 3fdee2b9ed36072163f7284c3296a2f6d353c5d1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 11:19:43 -0400 Subject: [PATCH 13/68] Merge from stable into unstable --- .../walkers/variantutils/CombineVariantsIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index b65de9d36..0205fe0eb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -90,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a8a6e7589f22e0b6c5d222066b9a2093"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } @@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, 1, - Arrays.asList("")); + Arrays.asList("5969446769cb8377daa2db29304ae6b5")); executeTest("combineDBSNPDuplicateSites:", spec); } } \ No newline at end of file From a05c959e5a9729a023260ffc019dd2df44ce14b8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 11:20:07 -0400 Subject: [PATCH 14/68] Empty unit tests for VariantContextUtils -- will be expanded over the day --- .../sting/utils/variantcontext/Genotype.java | 19 ++++- .../VariantContextUnitTest.java | 6 +- .../VariantContextUtilsUnitTest.java | 84 +++++++++++++++++++ 3 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 85d752003..577168578 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -25,15 +25,32 @@ public class Genotype { protected boolean isPhased = false; protected boolean filtersWereAppliedToContext; - public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { + this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null); + } + + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { if ( alleles != null ) this.alleles = Collections.unmodifiableList(alleles); + if ( log10Likelihoods != null ) + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; validate(); } + /** + * Creates a new Genotype for sampleName with genotype according to alleles. + * @param sampleName + * @param alleles + * @param negLog10PError the confidence in these alleles + * @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known + */ + public Genotype(String sampleName, List alleles, double negLog10PError, double[] log10Likelihoods) { + this(sampleName, alleles, negLog10PError, null, null, false, log10Likelihoods); + } + public Genotype(String sampleName, List alleles, double negLog10PError) { this(sampleName, alleles, negLog10PError, null, null, false); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index f8e6da20a..663eb9ef6 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -5,6 +5,7 @@ package org.broadinstitute.sting.utils.variantcontext; // the imports for unit testing. +import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeTest; @@ -14,10 +15,7 @@ import java.util.Arrays; import java.util.List; -/** - * Basic unit test for RecalData - */ -public class VariantContextUnitTest { +public class VariantContextUnitTest extends BaseTest { Allele A, Aref, T, Tref; Allele del, delRef, ATC, ATCref; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java new file mode 100644 index 000000000..9ca29d41c --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.variantcontext; + + +// the imports for unit testing. + + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + + +public class VariantContextUtilsUnitTest extends BaseTest { + Allele Aref, T, delRef, ATC; + Genotype snp1, snp2, indel1; + private GenomeLocParser genomeLocParser; + + @BeforeSuite + public void setup() { + final File referenceFile = new File(b37KGReference); + try { + IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile); + genomeLocParser = new GenomeLocParser(seq); + } + catch(FileNotFoundException ex) { + throw new UserException.CouldNotReadInputFile(referenceFile,ex); + } + + // alleles + Aref = Allele.create("A", true); + delRef = Allele.create("-", true); + T = Allele.create("T"); + ATC = Allele.create("ATC"); + + snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); + snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); + indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); + } + + private VariantContext makeVC(String source, List alleles) { + return makeVC(source, alleles, null, null); + } + + private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { + int start = 10; + int stop = alleles.contains(ATC) ? start + 3 : start; + return new VariantContext(source, "1", start, stop, alleles, genotypes, 1.0, filters, null); + } +} From 5e06a456286ca2ac4b24abb7c541fa3cbab102d5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 11:55:40 -0400 Subject: [PATCH 15/68] Fix the AnalayzeCovariates packaging. --- public/packages/AnalyzeCovariates.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/packages/AnalyzeCovariates.xml b/public/packages/AnalyzeCovariates.xml index a6675a63d..e8d58862a 100644 --- a/public/packages/AnalyzeCovariates.xml +++ b/public/packages/AnalyzeCovariates.xml @@ -6,7 +6,7 @@ - + From ba5f83fee2487f06d3969f8a55b9ee0214986c5f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 12:10:39 -0400 Subject: [PATCH 16/68] start of VariantContextUtils UnitTest -- tests rsID merging --- .../sting/utils/variantcontext/Genotype.java | 8 +- .../VariantContextUtilsUnitTest.java | 124 +++++++++++++++++- 2 files changed, 125 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 577168578..2e233c18e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -25,16 +25,16 @@ public class Genotype { protected boolean isPhased = false; protected boolean filtersWereAppliedToContext; - public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null); } - public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { if ( alleles != null ) this.alleles = Collections.unmodifiableList(alleles); - if ( log10Likelihoods != null ) - attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); + if ( log10Likelihoods != null ) + commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; validate(); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 9ca29d41c..81007f9ff 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -30,6 +30,7 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -39,6 +40,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; import java.io.File; import java.io.FileNotFoundException; @@ -47,8 +49,9 @@ import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { Allele Aref, T, delRef, ATC; - Genotype snp1, snp2, indel1; + Genotype ref1, snp1, snp2, indel1, indelref; private GenomeLocParser genomeLocParser; + VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; @BeforeSuite public void setup() { @@ -67,18 +70,133 @@ public class VariantContextUtilsUnitTest extends BaseTest { T = Allele.create("T"); ATC = Allele.create("ATC"); + ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); + indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); + + refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); + snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); + snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); + snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); + snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); + indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); + indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); + indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); } private VariantContext makeVC(String source, List alleles) { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, Collection genotypes) { + return makeVC(source, alleles, genotypes, null); + } + private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { int start = 10; - int stop = alleles.contains(ATC) ? start + 3 : start; - return new VariantContext(source, "1", start, stop, alleles, genotypes, 1.0, filters, null); + int stop = start; // alleles.contains(ATC) ? start + 3 : start; + return new VariantContext(source, "1", start, stop, alleles, + VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + 1.0, filters, null, (byte)'C'); } + + private class SimpleMergeTest extends TestDataProvider { + List inputVCs; + VariantContext expectedVC; + + private SimpleMergeTest(VariantContext... vcsArg) { + super(SimpleMergeTest.class); + LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); + expectedVC = allVCs.pollLast(); + inputVCs = allVCs; + } + + public String toString() { + return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + } + } + + @DataProvider(name = "simplemergedata") + public Object[][] createSimpleMergeData() { + // first, do no harm + new SimpleMergeTest(refVC, refVC); + new SimpleMergeTest(snpVC1, snpVC1); + new SimpleMergeTest(indelVC1, indelVC1); + new SimpleMergeTest(indelVC3, indelVC3); + + new SimpleMergeTest(refVC, snpVC1, snpVC3); + new SimpleMergeTest(snpVC1, snpVC2, snpVC2); + new SimpleMergeTest(refVC, snpVC2, snpVC4); + + new SimpleMergeTest(indelVC1, indelVC2, indelVC3); + new SimpleMergeTest(indelVC1, indelVC3, indelVC3); + new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + + return SimpleMergeTest.getTests(SimpleMergeTest.class); + } + + private class SimpleMergeRSIDTest extends TestDataProvider { + List inputs; + String expected; + + private SimpleMergeRSIDTest(String... arg) { + super(SimpleMergeRSIDTest.class); + LinkedList allStrings = new LinkedList(Arrays.asList(arg)); + expected = allStrings.pollLast(); + inputs = allStrings; + } + + public String toString() { + return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected); + } + } + + @DataProvider(name = "simplemergersiddata") + public Object[][] createSimpleMergeRSIDData() { + new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest(".", "rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", ".", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); + new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); + new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); + new SimpleMergeRSIDTest("rs1", ".", ".", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3"); + + return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class); + } + + @Test(dataProvider = "simplemergersiddata") + public void testRSIDMerge(SimpleMergeRSIDTest cfg) { + List inputs = new ArrayList(); + for ( String id : cfg.inputs ) { + MutableVariantContext vc = new MutableVariantContext(snpVC1); + if ( ! id.equals(".") ) vc.setID(id); + inputs.add(vc); + + } + + VariantContext merged = myMerge(inputs); + Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); + } + + private VariantContext myMerge(List inputs) { + List priority = new ArrayList(); + for ( VariantContext vc : inputs ) priority.add(vc.getSource()); + + return VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + } + + // todo -- add tests for subset merging, especially with correct PLs + // todo -- test priority list + // todo -- test FilteredRecordMergeType + // todo -- no annotate origin + // todo -- test set key + // todo -- test filtered are uncalled } From 15a410b24b3ed42d6d05698da33f3824cf6f14cf Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 13:15:41 -0400 Subject: [PATCH 17/68] Updating md5 for fixed file --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index b4a8498e1..1b2a6e82e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("6645babc8c7d46be0da223477c7b1291")); + Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 9c1728416cf6773e510fc8c8843055189e1b03a4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 13:16:42 -0400 Subject: [PATCH 18/68] Revert "Updating md5 for fixed file" because this was fixed properly in unstable (but will break SnpEff if put into Stable). This reverts commit 6b4182c6ab3e214da4c73bc6f3687ac6d1c0b72c. --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index 1b2a6e82e..b4a8498e1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); + Arrays.asList("6645babc8c7d46be0da223477c7b1291")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 4e9020c9f7bd6fc429090213ad06c9a6b46ecc70 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 13:28:25 -0400 Subject: [PATCH 19/68] Fixed alignment start for hard clipping insertions --- .../sting/utils/clipreads/ClippingOp.java | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 951ab265c..0c2def1c6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -432,25 +432,37 @@ public class ClippingOp { } private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { - int shift = 0; + int newShift = 0; + int oldShift = 0; + int deletionShift = 0; - // Rewind to previous start (by counting everything that was already clipped in this read) - for (CigarElement cigarElement : oldCigar.getCigarElements()) { - if (!cigarElement.getOperator().consumesReferenceBases()) - shift -= cigarElement.getLength(); - else - break; - } - - // Advance to new start (by counting everything new that has been clipped ) for (CigarElement cigarElement : newCigar.getCigarElements()) { - if (!cigarElement.getOperator().consumesReferenceBases()) - shift += cigarElement.getLength(); + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) + newShift += cigarElement.getLength(); else break; } - return shift; + for (CigarElement cigarElement : oldCigar.getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP ) + oldShift += Math.min(cigarElement.getLength(), newShift - oldShift); + else + break; + } + + int basesClipped = 0; + for (CigarElement cigarElement : oldCigar.getCigarElements()) { + if (basesClipped > newShift) // are we beyond the clipped region? + break; + + else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift + deletionShift += cigarElement.getLength(); + + else if (cigarElement.getOperator() != CigarOperator.INSERTION) // if it's not an insertion or deletion, than it counts as hard clipped base. + basesClipped += cigarElement.getLength(); + } + + return newShift - oldShift + deletionShift; } private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { From 80d7300de4acaa172e256e91f02a962d6479c6e0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 22 Sep 2011 13:28:42 -0400 Subject: [PATCH 20/68] Unit test was passing in FORMAT as one of the sample names. There used to be a hack in the VCFHeader to check for this and remove it and I couldn't figure out why, but now I know. Hack was removed and now the unit test passes in only the sample names as per the contract. --- .../sting/utils/genotype/vcf/VCFWriterUnitTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index a8e6593b1..35c6a4993 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest { public static VCFHeader createFakeHeader(Set metaData, Set additionalColumns) { metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); metaData.add(new VCFHeaderLine("two", "2")); - additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); return new VCFHeader(metaData, additionalColumns); @@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest { Assert.assertTrue(additionalColumns.contains(key)); index++; } - Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */); + Assert.assertEquals(index, additionalColumns.size()); } } From 1acf7945c544a923c5024b06a8322d351c889584 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 14:51:14 -0400 Subject: [PATCH 21/68] Fixed hard clipped cigar and alignment start * Hard clipped Cigar now includes all insertions that were hard clipped and not the deletions. * The alignment start is now recalculated according to the new hard clipped cigar representation --- .../sting/utils/clipreads/ClippingOp.java | 6 +++--- .../sting/utils/clipreads/ReadClipper.java | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 0c2def1c6..47ce8165c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -458,7 +458,7 @@ public class ClippingOp { else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift deletionShift += cigarElement.getLength(); - else if (cigarElement.getOperator() != CigarOperator.INSERTION) // if it's not an insertion or deletion, than it counts as hard clipped base. + else basesClipped += cigarElement.getLength(); } @@ -474,8 +474,8 @@ public class ClippingOp { return -clippedLength; } - if (cigarElement.getOperator() == CigarOperator.DELETION) - return cigarElement.getLength(); +// if (cigarElement.getOperator() == CigarOperator.DELETION) +// return cigarElement.getLength(); return 0; } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 9b9ce4fdf..e2ecbe46b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -71,21 +71,21 @@ public class ReadClipper { private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); - int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); - if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read)) + if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - // TODO add check in the Hardclip function - if ( start > stop ) - stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); - + if ( start > stop ) { +// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + } //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read - stop -= numDeletions(read); - if ( start > stop ) - start -= numDeletions(read); +// stop -= numDeletions(read); +// if ( start > stop ) +// start -= numDeletions(read); //System.out.println("Clipping start/stop: " + start + "/" + stop); From 68da555932761567f79f0b3bfdbc808dcf72ca5e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 15:16:37 -0400 Subject: [PATCH 22/68] UnitTest for simpleMerge for alleles --- .../VariantContextUtilsUnitTest.java | 176 ++++++++++++------ 1 file changed, 118 insertions(+), 58 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 81007f9ff..01d398c1a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -48,10 +48,8 @@ import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, delRef, ATC; - Genotype ref1, snp1, snp2, indel1, indelref; + Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; - VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; @BeforeSuite public void setup() { @@ -68,22 +66,9 @@ public class VariantContextUtilsUnitTest extends BaseTest { Aref = Allele.create("A", true); delRef = Allele.create("-", true); T = Allele.create("T"); + C = Allele.create("C"); ATC = Allele.create("ATC"); - - ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); - snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); - snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); - indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); - indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); - - refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); - snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); - snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); - snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); - snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); - indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); - indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); - indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + ATCATC = Allele.create("ATCATC"); } private VariantContext makeVC(String source, List alleles) { @@ -98,45 +83,124 @@ public class VariantContextUtilsUnitTest extends BaseTest { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; return new VariantContext(source, "1", start, stop, alleles, - VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), 1.0, filters, null, (byte)'C'); } - private class SimpleMergeTest extends TestDataProvider { - List inputVCs; - VariantContext expectedVC; + // -------------------------------------------------------------------------------- + // + // Test allele merging + // + // -------------------------------------------------------------------------------- - private SimpleMergeTest(VariantContext... vcsArg) { - super(SimpleMergeTest.class); - LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); - expectedVC = allVCs.pollLast(); - inputVCs = allVCs; + private class MergeAllelesTest extends TestDataProvider { + List> inputs; + List expected; + + private MergeAllelesTest(List... arg) { + super(MergeAllelesTest.class); + LinkedList> all = new LinkedList>(Arrays.asList(arg)); + expected = all.pollLast(); + inputs = all; } public String toString() { - return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected); } } - - @DataProvider(name = "simplemergedata") - public Object[][] createSimpleMergeData() { + @DataProvider(name = "mergeAlleles") + public Object[][] mergeAllelesData() { // first, do no harm - new SimpleMergeTest(refVC, refVC); - new SimpleMergeTest(snpVC1, snpVC1); - new SimpleMergeTest(indelVC1, indelVC1); - new SimpleMergeTest(indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(refVC, snpVC1, snpVC3); - new SimpleMergeTest(snpVC1, snpVC2, snpVC2); - new SimpleMergeTest(refVC, snpVC2, snpVC4); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(indelVC1, indelVC2, indelVC3); - new SimpleMergeTest(indelVC1, indelVC3, indelVC3); - new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); - return SimpleMergeTest.getTests(SimpleMergeTest.class); + new MergeAllelesTest(Arrays.asList(Aref, C), + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(Aref, C, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T, C), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef)); // todo -- FIXME me GdA + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + return MergeAllelesTest.getTests(MergeAllelesTest.class); } + @Test(dataProvider = "mergeAlleles") + public void testMergeAlleles(MergeAllelesTest cfg) { + final List priority = new ArrayList(); + final List inputs = new ArrayList(); + + int i = 0; + for ( final List alleles : cfg.inputs ) { + final String name = "vcf" + ++i; + priority.add(name); + inputs.add(makeVC(name, alleles)); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + Assert.assertEquals(merged.getAlleles(), cfg.expected); + } + +// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; +// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); +// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); +// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); +// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); +// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); +// +// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); +// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); +// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); +// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); +// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); +// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); +// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); +// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + + // -------------------------------------------------------------------------------- + // + // Test rsID merging + // + // -------------------------------------------------------------------------------- + private class SimpleMergeRSIDTest extends TestDataProvider { List inputs; String expected; @@ -156,10 +220,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "simplemergersiddata") public Object[][] createSimpleMergeRSIDData() { new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest(".", ".", "."); new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs1", "rs1"); new SimpleMergeRSIDTest(".", "rs1", "rs1"); new SimpleMergeRSIDTest("rs1", ".", "rs1"); new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); @@ -171,32 +238,25 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "simplemergersiddata") public void testRSIDMerge(SimpleMergeRSIDTest cfg) { - List inputs = new ArrayList(); - for ( String id : cfg.inputs ) { + final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T)); + final List inputs = new ArrayList(); + + for ( final String id : cfg.inputs ) { MutableVariantContext vc = new MutableVariantContext(snpVC1); if ( ! id.equals(".") ) vc.setID(id); inputs.add(vc); - } - VariantContext merged = myMerge(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, null, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } - private VariantContext myMerge(List inputs) { - List priority = new ArrayList(); - for ( VariantContext vc : inputs ) priority.add(vc.getSource()); - - return VariantContextUtils.simpleMerge(genomeLocParser, - inputs, priority, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); - } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType // todo -- no annotate origin // todo -- test set key - // todo -- test filtered are uncalled } From 39b54211d079dc2c70a2a9b4a38b9e08880b24df Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 15:46:55 -0400 Subject: [PATCH 24/68] Fixed hard clipping soft clipped bases after hard clips if soft clipped bases were after a hard clipped section of the read, the hard clip was clipping the left soft clip tail as if it were a right tail. Mayhem. --- .../org/broadinstitute/sting/utils/clipreads/ReadClipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index e2ecbe46b..d0e7f8371 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -145,7 +145,7 @@ public class ReadClipper { cutLeft = readIndex + cigarElement.getLength() - 1; } } - else + else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) rightTail = true; if (cigarElement.getOperator().consumesReadBases()) From 46ca33dc047d6a52406142d9dc323185938d7dab Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:04:32 -0400 Subject: [PATCH 27/68] TestDataProvider now can be named --- .../test/org/broadinstitute/sting/BaseTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 63faf1ab9..35a81770d 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -132,15 +132,21 @@ public abstract class BaseTest { */ public static class TestDataProvider { private static final Map> tests = new HashMap>(); + private final String name; /** * Create a new TestDataProvider instance bound to the class variable C * @param c */ - public TestDataProvider(Class c) { + public TestDataProvider(Class c, String name) { if ( ! tests.containsKey(c) ) tests.put(c, new ArrayList()); tests.get(c).add(this); + this.name = name; + } + + public TestDataProvider(Class c) { + this(c, ""); } /** @@ -153,6 +159,11 @@ public abstract class BaseTest { for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); return params2.toArray(new Object[][]{}); } + + @Override + public String toString() { + return "TestDataProvider("+name+")"; + } } /** From 5cf82f923615b2dae0dee1b80f043fadacdfbc05 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:05:12 -0400 Subject: [PATCH 28/68] simpleMerge UnitTest tests filtered VC merging --- .../variantcontext/VariantContextUtils.java | 14 +- .../VariantContextUtilsUnitTest.java | 141 ++++++++++++++++-- 2 files changed, 135 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ca9c71eba..03fe969b5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -44,6 +44,11 @@ import java.io.Serializable; import java.util.*; public class VariantContextUtils { + public final static String MERGE_INTERSECTION = "Intersection"; + public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; + public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; + public final static String MERGE_FILTER_PREFIX = "filterIn"; + final public static JexlEngine engine = new JexlEngine(); static { engine.setSilent(false); // will throw errors now for selects that don't evaluate properly @@ -609,19 +614,20 @@ public class VariantContextUtils { if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) filters.clear(); + if ( annotateOrigin ) { // we care about where the call came from String setValue; if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered - setValue = "Intersection"; + setValue = MERGE_INTERSECTION; else if ( nFiltered == VCs.size() ) // everything was filtered out - setValue = "FilteredInAll"; + setValue = MERGE_FILTER_IN_ALL; else if ( variantSources.isEmpty() ) // everyone was reference - setValue = "ReferenceInAll"; + setValue = MERGE_REF_IN_ALL; else { LinkedHashSet s = new LinkedHashSet(); for ( VariantContext vc : VCs ) if ( vc.isVariant() ) - s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); + s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() ); setValue = Utils.join("-", s); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 01d398c1a..ff26d7245 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -41,6 +41,7 @@ import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; +import org.yaml.snakeyaml.Yaml; import java.io.File; import java.io.FileNotFoundException; @@ -75,6 +76,14 @@ public class VariantContextUtilsUnitTest extends BaseTest { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, String filter) { + return makeVC(source, alleles, filter.equals(".") ? null : new HashSet(Arrays.asList(filter))); + } + + private VariantContext makeVC(String source, List alleles, Set filters) { + return makeVC(source, alleles, null, filters); + } + private VariantContext makeVC(String source, List alleles, Collection genotypes) { return makeVC(source, alleles, genotypes, null); } @@ -179,22 +188,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getAlleles(), cfg.expected); } -// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; -// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); -// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); -// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); -// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); -// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); -// -// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); -// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); -// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); -// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); -// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); -// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); -// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); -// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); - // -------------------------------------------------------------------------------- // // Test rsID merging @@ -254,6 +247,122 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } + // -------------------------------------------------------------------------------- + // + // Test filtered merging + // + // -------------------------------------------------------------------------------- + + private class MergeFilteredTest extends TestDataProvider { + List inputs; + VariantContext expected; + String setExpected; + VariantContextUtils.FilteredRecordMergeType type; + + + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) { + this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected); + } + + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) { + super(MergeFilteredTest.class, name); + LinkedList all = new LinkedList(Arrays.asList(input1, input2)); + this.expected = expected; + this.type = type; + inputs = all; + this.setExpected = setExpected; + } + + public String toString() { + return String.format("%s input=%s expected=%s", super.toString(), inputs, expected); + } + } + + @DataProvider(name = "mergeFiltered") + public Object[][] mergeFilteredData() { + new MergeFilteredTest("AllPass", + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); + + new MergeFilteredTest("noFilters", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.MERGE_INTERSECTION); + + new MergeFilteredTest("oneFiltered", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("onePassOneFail", + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("FailOneUnfiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("AllFiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.MERGE_FILTER_IN_ALL); + + // test ALL vs. ANY + new MergeFilteredTest("OneFailAllUnfilteredArg", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + // test excluding allele in filtered record + new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + // promotion of site from unfiltered to PASSES + new MergeFilteredTest("UnfilteredPlusPassIsPass", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); + + return MergeFilteredTest.getTests(MergeFilteredTest.class); + } + + @Test(dataProvider = "mergeFiltered") + public void testMergeFiltered(MergeFilteredTest cfg) { + final List priority = new ArrayList(); + + for ( final VariantContext vc : cfg.inputs ) { + priority.add(vc.getSource()); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + + // test alleles are equal + Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); + + // test set field + Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected); + + // test filter field + Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); + } + + // todo -- add tests for subset merging, especially with correct PLs // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType From 30ab3af0c88d4d22a6d894c8d5e45863b4957445 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:14:59 -0400 Subject: [PATCH 29/68] A few more simpleMerge UnitTest tests for filtered vcs --- .../VariantContextUtilsUnitTest.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index ff26d7245..070fdaca4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -304,12 +304,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); - new MergeFilteredTest("FailOneUnfiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); - new MergeFilteredTest("AllFiltered", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"), @@ -317,6 +311,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { VariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY + new MergeFilteredTest("FailOneUnfiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + new MergeFilteredTest("OneFailAllUnfilteredArg", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "."), @@ -338,6 +339,18 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), VariantContextUtils.MERGE_INTERSECTION); + new MergeFilteredTest("RefInAll", + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_REF_IN_ALL); + + new MergeFilteredTest("RefInOne", + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + "2"); + return MergeFilteredTest.getTests(MergeFilteredTest.class); } From dab7232e9af623a3d085988bac751ee4df229748 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:26:11 -0400 Subject: [PATCH 30/68] simpleMerge UnitTest for not annotating and annotating to different info key --- .../VariantContextUtilsUnitTest.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 070fdaca4..77e4391a3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -375,10 +375,26 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } + @Test + public void testAnnotationSet() { + for ( final boolean annotate : Arrays.asList(true, false)) { + for ( final String set : Arrays.asList("set", "combine", "x")) { + final List priority = Arrays.asList("1", "2"); + VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); + VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); + + if ( annotate ) + Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION); + else + Assert.assertFalse(merged.hasAttribute(set)); + } + } + } // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X - // todo -- test FilteredRecordMergeType - // todo -- no annotate origin - // todo -- test set key + // todo -- test priority list } From a8e0fb26ea309a6620d540310f1224fb08ec7440 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 23 Sep 2011 07:33:20 -0400 Subject: [PATCH 31/68] Updating md5 because the file changed --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index b4a8498e1..1b2a6e82e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("6645babc8c7d46be0da223477c7b1291")); + Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 4397ce8653eab67546e261faef5ebd5619057b79 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:23:46 -0400 Subject: [PATCH 33/68] Moved removePLs to VariantContextUtils --- .../sting/utils/variantcontext/Genotype.java | 7 ------- .../sting/utils/variantcontext/VariantContextUtils.java | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 2e233c18e..fd8c70abe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -74,13 +74,6 @@ public class Genotype { return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); } - public static Genotype removePLs(Genotype g) { - Map attrs = new HashMap(g.getAttributes()); - attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); - return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); - } - public static Genotype modifyAlleles(Genotype g, List alleles) { return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 03fe969b5..93d4f6f5c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -159,6 +159,13 @@ public class VariantContextUtils { return "%." + precision + "f"; } + public static Genotype removePLs(Genotype g) { + Map attrs = new HashMap(g.getAttributes()); + attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); + attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); + } + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ @@ -740,7 +747,7 @@ public class VariantContextUtils { Map newGs = new HashMap(genotypes.size()); for ( Map.Entry g : genotypes.entrySet() ) { - newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); + newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue()); } return newGs; From a9f073fa68f500fbad4e068b75e3ff924a0d7d66 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:24:49 -0400 Subject: [PATCH 34/68] Genotype merging unit tests for simpleMerge -- Remaining TODOs are all for GdA --- .../VariantContextUtilsUnitTest.java | 319 +++++++++++++----- 1 file changed, 234 insertions(+), 85 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 77e4391a3..c1025a7de 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -21,14 +21,8 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -// our package package org.broadinstitute.sting.utils.variantcontext; - -// the imports for unit testing. - - import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; @@ -47,7 +41,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.*; - public class VariantContextUtilsUnitTest extends BaseTest { Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; @@ -72,10 +65,22 @@ public class VariantContextUtilsUnitTest extends BaseTest { ATCATC = Allele.create("ATCATC"); } + private Genotype makeG(String sample, Allele a1, Allele a2) { + return new Genotype(sample, Arrays.asList(a1, a2)); + } + + private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) { + return new Genotype(sample, Arrays.asList(a1, a2), log10pError); + } + private VariantContext makeVC(String source, List alleles) { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, Genotype... g1) { + return makeVC(source, alleles, Arrays.asList(g1)); + } + private VariantContext makeVC(String source, List alleles, String filter) { return makeVC(source, alleles, filter.equals(".") ? null : new HashSet(Arrays.asList(filter))); } @@ -121,66 +126,66 @@ public class VariantContextUtilsUnitTest extends BaseTest { public Object[][] mergeAllelesData() { // first, do no harm new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref)); + Arrays.asList(Aref)); new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref), - Arrays.asList(Aref)); + Arrays.asList(Aref), + Arrays.asList(Aref)); new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref, T), - Arrays.asList(Aref, T)); + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); new MergeAllelesTest(Arrays.asList(Aref, C), - Arrays.asList(Aref, T), - Arrays.asList(Aref, C, T)); + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); new MergeAllelesTest(Arrays.asList(Aref, T), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele new MergeAllelesTest(Arrays.asList(Aref, C, T), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); new MergeAllelesTest(Arrays.asList(Aref, T, C), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef)); // todo -- FIXME me GdA + Arrays.asList(delRef)); // todo -- FIXME me GdA new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATC)); + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); new MergeAllelesTest(Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); new MergeAllelesTest(Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } @Test(dataProvider = "mergeAlleles") public void testMergeAlleles(MergeAllelesTest cfg) { - final List priority = new ArrayList(); final List inputs = new ArrayList(); int i = 0; for ( final List alleles : cfg.inputs ) { final String name = "vcf" + ++i; - priority.add(name); inputs.add(makeVC(name, alleles)); } + final List priority = vcs2priority(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, @@ -281,87 +286,82 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "mergeFiltered") public Object[][] mergeFilteredData() { new MergeFilteredTest("AllPass", - makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("noFilters", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("oneFiltered", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("onePassOneFail", - makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("AllFiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.MERGE_FILTER_IN_ALL); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY new MergeFilteredTest("FailOneUnfiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("OneFailAllUnfilteredArg", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); // test excluding allele in filtered record new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); // promotion of site from unfiltered to PASSES new MergeFilteredTest("UnfilteredPlusPassIsPass", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("RefInAll", - makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_REF_IN_ALL); + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_REF_IN_ALL); new MergeFilteredTest("RefInOne", - makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - "2"); + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + "2"); return MergeFilteredTest.getTests(MergeFilteredTest.class); } @Test(dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { - final List priority = new ArrayList(); - - for ( final VariantContext vc : cfg.inputs ) { - priority.add(vc.getSource()); - } - + final List priority = vcs2priority(cfg.inputs); final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); @@ -375,6 +375,148 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } + // -------------------------------------------------------------------------------- + // + // Test genotype merging + // + // -------------------------------------------------------------------------------- + + private class MergeGenotypesTest extends TestDataProvider { + List inputs; + VariantContext expected; + List priority; + + private MergeGenotypesTest(String name, String priority, VariantContext... arg) { + super(MergeGenotypesTest.class, name); + LinkedList all = new LinkedList(Arrays.asList(arg)); + this.expected = all.pollLast(); + inputs = all; + this.priority = Arrays.asList(priority.split(",")); + } + + public String toString() { + return String.format("%s input=%s expected=%s", super.toString(), inputs, expected); + } + } + + @DataProvider(name = "mergeGenotypes") + public Object[][] mergeGenotypesData() { + new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1))); + + new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1))); + + new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2))); + + new MergeGenotypesTest("NonOverlappingGenotypes", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2))); + + new MergeGenotypesTest("PreserveNoCall", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2))); + + new MergeGenotypesTest("PerserveAlleles", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)), + makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2))); + + new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3))); + + new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3))); + + // todo -- GDA -- add tests for merging correct PLs + + return MergeGenotypesTest.getTests(MergeGenotypesTest.class); + } + + // todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE + + @Test(dataProvider = "mergeGenotypes") + public void testMergeGenotypes(MergeGenotypesTest cfg) { + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + + // test alleles are equal + Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); + + // test genotypes + assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes()); + } + + // necessary to not overload equals for genotypes + private void assertGenotypesAreMostlyEqual(Map actual, Map expected) { + if (actual == expected) { + return; + } + + if (actual == null || expected == null) { + Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual); + } + + if (actual.size() != expected.size()) { + Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); + } + + for (Map.Entry entry : actual.entrySet()) { + String key = entry.getKey(); + Genotype value = entry.getValue(); + Genotype expectedValue = expected.get(key); + + Assert.assertEquals(value.alleles, expectedValue.alleles); + Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError()); + Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods()); + if ( value.hasLikelihoods() ) + Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods()); + } + } + + @Test + public void testMergeGenotypesUniquify() { + final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)); + final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); + + // test genotypes + Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet(Arrays.asList("s1.1", "s1.2"))); + } + + @Test(expectedExceptions = UserException.class) + public void testMergeGenotypesRequireUnique() { + final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)); + final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false); + } + + // -------------------------------------------------------------------------------- + // + // Misc. tests + // + // -------------------------------------------------------------------------------- + @Test public void testAnnotationSet() { for ( final boolean annotate : Arrays.asList(true, false)) { @@ -395,6 +537,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { } } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + private static final List vcs2priority(final Collection vcs) { + final List priority = new ArrayList(); + + for ( final VariantContext vc : vcs ) { + priority.add(vc.getSource()); + } + + return priority; + } } From 106a26c42da8780cd33d7186ec0a658830ad6819 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:25:20 -0400 Subject: [PATCH 35/68] Minor file cleanup --- .../utils/variantcontext/VariantContextUtilsUnitTest.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index c1025a7de..15a5549b2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -24,18 +24,14 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.Test; import org.testng.annotations.DataProvider; -import org.yaml.snakeyaml.Yaml; +import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; @@ -446,8 +442,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { return MergeGenotypesTest.getTests(MergeGenotypesTest.class); } - // todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE - @Test(dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, From dfce301bebd62275ad55d326dd2801cbbce47d81 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:03:04 -0400 Subject: [PATCH 36/68] Looks for @Hidden annotation on all classes and excludes them from the docs --- .../sting/utils/help/GenericDocumentationHandler.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 4f1e95499..7ea77a939 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { public boolean includeInDocs(ClassDoc doc) { try { Class type = HelpUtils.getClassForDoc(doc); - return JVMUtils.isConcrete(type); + boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class); + return ! hidden && JVMUtils.isConcrete(type); } catch ( ClassNotFoundException e ) { return false; } From dd65ba5baee8df3b57819e684dd3b63843809836 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:03:37 -0400 Subject: [PATCH 37/68] @Hidden for DocumentationTest and GATKDocsExample --- .../org/broadinstitute/sting/gatk/examples/GATKDocsExample.java | 2 ++ .../broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java | 1 + 2 files changed, 3 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java index 4541a0537..db4f477c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; * @author Your Name * @since Date created */ +@Hidden public class GATKDocsExample extends RodWalker { /** * Put detailed documentation about the argument here. No need to duplicate the summary information diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java index 933e24784..5b60a9db5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java @@ -42,6 +42,7 @@ import java.util.*; * *

Body test

*/ +@Hidden public class DocumentationTest extends RodWalker { // the docs for the arguments are in the collection @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); From 2bb77a7978881db4ff1d0b57fa7e2afd9a271295 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:04:16 -0400 Subject: [PATCH 39/68] Docs for all VariantAnnotator annotations --- .../gatk/walkers/annotator/AlleleBalance.java | 3 + .../annotator/AlleleBalanceBySample.java | 3 + .../walkers/annotator/AnnotationByDepth.java | 5 +- .../gatk/walkers/annotator/BaseCounts.java | 3 + .../annotator/BaseQualityRankSumTest.java | 3 + .../walkers/annotator/ChromosomeCounts.java | 5 + .../walkers/annotator/DepthOfCoverage.java | 7 +- .../annotator/DepthPerAlleleBySample.java | 19 + .../gatk/walkers/annotator/FisherStrand.java | 5 + .../gatk/walkers/annotator/GCContent.java | 3 + .../walkers/annotator/HaplotypeScore.java | 4 + .../gatk/walkers/annotator/HardyWeinberg.java | 3 + .../walkers/annotator/HomopolymerRun.java | 4 +- .../{GLstats.java => InbreedingCoeff.java} | 15 +- .../gatk/walkers/annotator/IndelType.java | 6 +- .../sting/gatk/walkers/annotator/LowMQ.java | 3 + .../annotator/MappingQualityRankSumTest.java | 3 + .../walkers/annotator/MappingQualityZero.java | 3 + .../annotator/MappingQualityZeroBySample.java | 166 ++++--- .../annotator/MappingQualityZeroFraction.java | 5 +- .../gatk/walkers/annotator/NBaseCount.java | 5 +- .../gatk/walkers/annotator/QualByDepth.java | 7 +- .../walkers/annotator/RMSMappingQuality.java | 3 + .../gatk/walkers/annotator/RankSumTest.java | 4 +- .../ReadDepthAndAllelicFractionBySample.java | 416 +++++++++--------- .../walkers/annotator/ReadPosRankSumTest.java | 5 +- .../gatk/walkers/annotator/SBByDepth.java | 5 +- .../gatk/walkers/annotator/SampleList.java | 4 +- .../walkers/annotator/SpanningDeletions.java | 3 + .../annotator/TechnologyComposition.java | 8 +- 30 files changed, 398 insertions(+), 330 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/{GLstats.java => InbreedingCoeff.java} (90%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index cf68a9121..e501258c5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -43,6 +43,9 @@ import java.util.List; import java.util.Map; +/** + * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples + */ public class AlleleBalance extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index ddb7ab828..75c4037d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample + */ public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index dc41dbc81..353fd1c2c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.Map; - - +/** + * Abstract base class for all annotations that are normalized by depth + */ public abstract class AnnotationByDepth extends InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index ecfd9b707..46aa6d0f3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -47,6 +47,9 @@ import java.util.List; import java.util.Map; +/** + * Count of A, C, G, T bases across all samples + */ public class BaseCounts extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 2a5c996f7..6cab6d95f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -13,6 +13,9 @@ import java.util.LinkedHashMap; import java.util.List; +/** + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele) + */ public class BaseQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index ad06dcf52..5ed2a6761 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -44,6 +44,11 @@ import java.util.List; import java.util.Map; +/** + * Allele count in genotypes, for each ALT allele, in the same order as listed; + * allele Frequency, for each ALT allele, in the same order as listed; total number + * of alleles in called genotypes. + */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index a4d8db5bd..8826de232 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -16,7 +16,12 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Total (unfiltered) depth over all samples. + * + * Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D + * is N * D + */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 1652c8de7..1cd30c51d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -23,6 +23,25 @@ import java.util.List; import java.util.Map; +/** + * The depth of coverage of each VCF allele in this sample + * + * Complementary fields that two important ways of thinking about the depth of the data for this sample + * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal + * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. + * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the + * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the + * power I have to determine the genotype of the sample at this site, while the AD tells me how many times + * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering + * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like + * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would + * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that + * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that + * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and + * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base + * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what + * determine the genotype calls (see below). + */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 0cfca48fa..393eb549c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation + * being seen on only the forward or only the reverse strand) in the reads? More bias is + * indicative of false positive calls. + */ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index a46473f60..11a64b49f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site + */ public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 9af3b8e8e..df6da3b85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Consistency of the site with two (and only two) segregating haplotypes. Higher scores + * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls. + */ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 045505698..f068ed895 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -19,6 +19,9 @@ import java.util.List; import java.util.Map; +/** + * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium + */ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 463f7a645..197a00243 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -16,7 +16,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Largest contiguous homopolymer run of the variant allele in either direction on the reference. + */ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { private boolean ANNOTATE_INDELS = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 5295d6d21..a14007147 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -17,14 +17,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 5/16/11 - */ -// A set of annotations calculated directly from the GLs -public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { +/** + * Likelihood-based (using PL field) test for the inbreeding among samples. + * + * A continuous generalization of the Hardy-Weinberg test for disequilibrium that works + * well with limited coverage per sample. See the 1000 Genomes Phase I release for + * more information. + */ +public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index bfede40d2..e0abfcf3c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: delangel - * Date: Mar 11, 2011 - * Time: 11:47:33 AM - * To change this template use File | Settings | File Templates. + * Rough category of indel type (insertion, deletion, multi-allelic, other) */ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 09ffe0fb6..753740258 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads + */ public class LowMQ extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index cc62580a9..157c615d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -14,6 +14,9 @@ import java.util.LinkedHashMap; import java.util.List; +/** + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele) + */ public class MappingQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("MQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index f9caae227..3a3efc4e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -19,6 +19,9 @@ import java.util.List; import java.util.Map; +/** + * Total count across all samples of mapping quality zero reads + */ public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 3d234a1e3..f14d7a8a5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -1,85 +1,81 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Feb 4, 2011 - * Time: 6:46:25 PM - * To change this template use File | Settings | File Templates. - */ -public class MappingQualityZeroBySample extends GenotypeAnnotation { - public Map annotate(RefMetaDataTracker tracker, - AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { - if ( g == null || !g.isCalled() ) - return null; - - int mq0 = 0; - ReadBackedPileup pileup = null; - if (vc.isIndel() && context.hasExtendedEventPileup()) - pileup = context.getExtendedEventPileup(); - else if (context.hasBasePileup()) - pileup = context.getBasePileup(); - else return null; - - if (pileup != null) { - for (PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) - mq0++; - } - } - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%d", mq0)); - return map; - } - - public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } - - public List getDescriptions() { return Arrays.asList( - new VCFFormatHeaderLine(getKeyNames().get(0), 1, - VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); } - - -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Count for each sample of mapping quality zero reads + */ +public class MappingQualityZeroBySample extends GenotypeAnnotation { + public Map annotate(RefMetaDataTracker tracker, + AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { + if ( g == null || !g.isCalled() ) + return null; + + int mq0 = 0; + ReadBackedPileup pileup = null; + if (vc.isIndel() && context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + else return null; + + if (pileup != null) { + for (PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) + mq0++; + } + } + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%d", mq0)); + return map; + } + + public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } + + public List getDescriptions() { return Arrays.asList( + new VCFFormatHeaderLine(getKeyNames().get(0), 1, + VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); } + + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 3e8fe8998..2164537b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -17,8 +17,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - - +/** + * Fraction of all reads across samples that have mapping quality zero + */ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 74c562045..891e9ae56 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -17,11 +17,8 @@ import java.util.List; import java.util.Map; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 5/16/11 + * The number of N bases, counting only SOLiD data */ - public class NBaseCount extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 9a292c39a..552289309 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -15,7 +16,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth. + * + * Low scores are indicative of false positive calls and artifacts. + */ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 668129888..40f6d20d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -21,6 +21,9 @@ import java.util.List; import java.util.Map; +/** + * Root Mean Square of the mapping quality of the reads across all samples. + */ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 52c704055..93e093248 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -21,7 +21,9 @@ import java.util.List; import java.util.Map; - +/** + * Abstract root for all RankSum based annotations + */ public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index 26ca08380..772541eb6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -1,209 +1,207 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Feb 4, 2011 - * Time: 3:59:27 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { - - private static String REF_ALLELE = "REF"; - - private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, - AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { - if ( g == null || !g.isCalled() ) - return null; - - if ( vc.isSNP() ) - return annotateSNP(stratifiedContext, vc); - if ( vc.isIndel() ) - return annotateIndel(stratifiedContext, vc); - - return null; - } - - private Map annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { - - if ( ! stratifiedContext.hasBasePileup() ) return null; - - HashMap alleleCounts = new HashMap(); - for ( Allele allele : vc.getAlternateAlleles() ) - alleleCounts.put(allele.getBases()[0], 0); - - ReadBackedPileup pileup = stratifiedContext.getBasePileup(); - int totalDepth = pileup.size(); - - Map map = new HashMap(); - map.put(getKeyNames().get(0), totalDepth); // put total depth in right away - - if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! - - int mq0 = 0; // number of "ref" reads that are acually mq0 - for ( PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) { - mq0++; - continue; - } - if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt - alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); - } - - if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do - - // we need to add counts in the correct order - String[] fracs = new String[alleleCounts.size()]; - for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { - fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0)); - } - - map.put(getKeyNames().get(1), fracs); - return map; - } - - private Map annotateIndel(AlignmentContext - stratifiedContext, VariantContext - vc) { - - if ( ! stratifiedContext.hasExtendedEventPileup() ) { - return null; - } - - ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); - if ( pileup == null ) - return null; - int totalDepth = pileup.size(); - - Map map = new HashMap(); - map.put(getKeyNames().get(0), totalDepth); // put total depth in right away - - if ( totalDepth == 0 ) return map; - int mq0 = 0; // number of "ref" reads that are acually mq0 - - HashMap alleleCounts = new HashMap(); - Allele refAllele = vc.getReference(); - - for ( Allele allele : vc.getAlternateAlleles() ) { - - if ( allele.isNoCall() ) { - continue; // this does not look so good, should we die??? - } - - alleleCounts.put(getAlleleRepresentation(allele), 0); - } - - for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { - - if ( e.getMappingQual() == 0 ) { - mq0++; - continue; - } - - if ( e.isInsertion() ) { - - final String b = e.getEventBases(); - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } - - } else { - if ( e.isDeletion() ) { - if ( e.getEventLength() == refAllele.length() ) { - // this is indeed the deletion allele recorded in VC - final String b = DEL; - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } - } -// else { -// System.out.print(" deletion of WRONG length found"); -// } - } - } - } - - if ( mq0 == totalDepth ) return map; - - String[] fracs = new String[alleleCounts.size()]; - for (int i = 0; i < vc.getAlternateAlleles().size(); i++) - fracs[i] = String.format("%.3f", - ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); - - map.put(getKeyNames().get(1), fracs); - - //map.put(getKeyNames().get(0), counts); - return map; - } - - private String getAlleleRepresentation(Allele allele) { - if ( allele.isNull() ) { // deletion wrt the ref - return DEL; - } else { // insertion, pass actual bases - return allele.getBaseString(); - } - - } - - // public String getIndelBases() - public List getKeyNames() { return Arrays.asList("DP","FA"); } - - public List getDescriptions() { - return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), - 1, - VCFHeaderLineType.Integer, - "Total read depth per sample, including MQ0"), - new VCFFormatHeaderLine(getKeyNames().get(1), - VCFHeaderLineCount.UNBOUNDED, - VCFHeaderLineType.Float, - "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); - } -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Unsupported + */ +@Hidden +public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { + + private static String REF_ALLELE = "REF"; + + private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, + AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + if ( g == null || !g.isCalled() ) + return null; + + if ( vc.isSNP() ) + return annotateSNP(stratifiedContext, vc); + if ( vc.isIndel() ) + return annotateIndel(stratifiedContext, vc); + + return null; + } + + private Map annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { + + if ( ! stratifiedContext.hasBasePileup() ) return null; + + HashMap alleleCounts = new HashMap(); + for ( Allele allele : vc.getAlternateAlleles() ) + alleleCounts.put(allele.getBases()[0], 0); + + ReadBackedPileup pileup = stratifiedContext.getBasePileup(); + int totalDepth = pileup.size(); + + Map map = new HashMap(); + map.put(getKeyNames().get(0), totalDepth); // put total depth in right away + + if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! + + int mq0 = 0; // number of "ref" reads that are acually mq0 + for ( PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) { + mq0++; + continue; + } + if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt + alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); + } + + if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do + + // we need to add counts in the correct order + String[] fracs = new String[alleleCounts.size()]; + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { + fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0)); + } + + map.put(getKeyNames().get(1), fracs); + return map; + } + + private Map annotateIndel(AlignmentContext + stratifiedContext, VariantContext + vc) { + + if ( ! stratifiedContext.hasExtendedEventPileup() ) { + return null; + } + + ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); + if ( pileup == null ) + return null; + int totalDepth = pileup.size(); + + Map map = new HashMap(); + map.put(getKeyNames().get(0), totalDepth); // put total depth in right away + + if ( totalDepth == 0 ) return map; + int mq0 = 0; // number of "ref" reads that are acually mq0 + + HashMap alleleCounts = new HashMap(); + Allele refAllele = vc.getReference(); + + for ( Allele allele : vc.getAlternateAlleles() ) { + + if ( allele.isNoCall() ) { + continue; // this does not look so good, should we die??? + } + + alleleCounts.put(getAlleleRepresentation(allele), 0); + } + + for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { + + if ( e.getMappingQual() == 0 ) { + mq0++; + continue; + } + + if ( e.isInsertion() ) { + + final String b = e.getEventBases(); + if ( alleleCounts.containsKey(b) ) { + alleleCounts.put(b, alleleCounts.get(b)+1); + } + + } else { + if ( e.isDeletion() ) { + if ( e.getEventLength() == refAllele.length() ) { + // this is indeed the deletion allele recorded in VC + final String b = DEL; + if ( alleleCounts.containsKey(b) ) { + alleleCounts.put(b, alleleCounts.get(b)+1); + } + } +// else { +// System.out.print(" deletion of WRONG length found"); +// } + } + } + } + + if ( mq0 == totalDepth ) return map; + + String[] fracs = new String[alleleCounts.size()]; + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) + fracs[i] = String.format("%.3f", + ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); + + map.put(getKeyNames().get(1), fracs); + + //map.put(getKeyNames().get(0), counts); + return map; + } + + private String getAlleleRepresentation(Allele allele) { + if ( allele.isNull() ) { // deletion wrt the ref + return DEL; + } else { // insertion, pass actual bases + return allele.getBaseString(); + } + + } + + // public String getIndelBases() + public List getKeyNames() { return Arrays.asList("DP","FA"); } + + public List getDescriptions() { + return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), + 1, + VCFHeaderLineType.Integer, + "Total read depth per sample, including MQ0"), + new VCFFormatHeaderLine(getKeyNames().get(1), + VCFHeaderLineCount.UNBOUNDED, + VCFHeaderLineType.Float, + "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index aabfb2970..27a9306d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -19,11 +19,8 @@ import java.util.LinkedHashMap; import java.util.List; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 3/30/11 + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error). */ - public class ReadPosRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("ReadPosRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index 180bed24d..efe96f226 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -15,8 +15,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - - +/** + * SB annotation value by depth of alt containing samples + */ public class SBByDepth extends AnnotationByDepth { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index cd396036f..ff409484d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -41,7 +41,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * List all of the samples in the info field + */ public class SampleList extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 42203824f..f747fbc2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * Fraction of reads containing spanning deletions at this site. + */ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index fa48c57a3..1f5508f4c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -19,12 +20,9 @@ import java.util.List; import java.util.Map; /** - * Created by IntelliJ IDEA. - * User: delangel - * Date: 6/29/11 - * Time: 3:14 PM - * To change this template use File | Settings | File Templates. + * Counts of bases from SLX, 454, and SOLiD at this site */ +@Hidden public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { private String nSLX = "NumSLX"; private String n454 ="Num454"; From e3d4efb2834b6aa5049a36b3e7ad4ca9458140d2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 11:55:21 -0400 Subject: [PATCH 41/68] Remove N2 EXACT model code, which should never be used --- .../genotyper/ExactAFCalculationModel.java | 251 +----------------- .../genotyper/UnifiedArgumentCollection.java | 5 - 2 files changed, 9 insertions(+), 247 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 6ae437b27..8a3a97823 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // code for testing purposes // private final static boolean DEBUG = false; - private final static boolean PRINT_LIKELIHOODS = false; - private final static int N_CYCLES = 1; - private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded"); - private SimpleTimer timerGS = new SimpleTimer("linearExactGS"); - private final static boolean COMPARE_TO_GS = false; - - public enum ExactCalculation { - N2_GOLD_STANDARD, - LINEAR_EXPERIMENTAL - } - private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - - private boolean SIMPLE_GREEDY_GENOTYPER = false; - + private final boolean SIMPLE_GREEDY_GENOTYPER = false; private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - final private ExactCalculation calcToUse; protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); - calcToUse = UAC.EXACT_CALCULATION_TYPE; } public void getLog10PNonRef(RefMetaDataTracker tracker, @@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { Map GLs, Setalleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - double[] gsPosteriors; - if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here - gsPosteriors = log10AlleleFrequencyPosteriors.clone(); - - int idxAA = GenotypeType.AA.ordinal(); - int idxAB = GenotypeType.AB.ordinal(); - int idxBB = GenotypeType.BB.ordinal(); - - // todo -- remove me after testing - if ( N_CYCLES > 1 ) { - for ( int i = 0; i < N_CYCLES; i++) { - timerGS.restart(); - linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB); - timerGS.stop(); - - timerExpt.restart(); - linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone()); - timerExpt.stop(); - } - - System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n", - timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime()); - } - - int lastK = -1; - - int numAlleles = alleles.size(); + final int numAlleles = alleles.size(); + final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null; + final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null; int idxDiag = numAlleles; int incr = numAlleles - 1; - - double[][] posteriorCache = new double[numAlleles-1][]; - double[] bestAFguess = new double[numAlleles-1]; - for (int k=1; k < numAlleles; k++) { // multi-allelic approximation, part 1: Ideally // for each alt allele compute marginal (suboptimal) posteriors - @@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD - idxAA = 0; - idxAB = k; + final int idxAA = 0; + final int idxAB = k; // yy is always element on the diagonal. // 2 alleles: BBelement 2 // 3 alleles: BB element 3. CC element 5 // 4 alleles: - idxBB = idxDiag; + final int idxBB = idxDiag; idxDiag += incr--; - // todo - possible cleanup - switch ( calcToUse ) { - case N2_GOLD_STANDARD: - lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - break; - case LINEAR_EXPERIMENTAL: - lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - break; - } + final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); + if (numAlleles > 2) { posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); @@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); } - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - if ( COMPARE_TO_GS ) { - gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB); - - double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]); - double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]); - boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0; - - if ( ! eq || PRINT_LIKELIHOODS ) { - System.out.printf("----------------------------------------%n"); - for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) { - double x = log10AlleleFrequencyPosteriors[k]; - System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k, - x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k], - log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]); - } - System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n", - ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar); - } - } - } private static final ArrayList getGLs(Map GLs) { ArrayList genotypeLikelihoods = new ArrayList(); - //int j = 0; genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy for ( Genotype sample : GLs.values() ) { if ( sample.hasLikelihoods() ) { - //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); double[] gls = sample.getLikelihoods().getAsVector(); if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) @@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - // now with banding - public int linearExactBanded(Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { - throw new NotImplementedException(); -// final int numSamples = GLs.size(); -// final int numChr = 2*numSamples; -// final double[][] genotypeLikelihoods = getGLs(GLs); -// -// final ExactACCache logY = new ExactACCache(numSamples+1); -// logY.getkMinus0()[0] = 0.0; // the zero case -// -// double maxLog10L = Double.NEGATIVE_INFINITY; -// boolean done = false; -// int lastK = -1; -// final int BAND_SIZE = 10; -// -// for (int k=0; k <= numChr && ! done; k++ ) { -// final double[] kMinus0 = logY.getkMinus0(); -// int jStart = Math.max(k - BAND_SIZE, 1); -// int jStop = Math.min(k + BAND_SIZE, numSamples); -// -// if ( k == 0 ) { // special case for k = 0 -// for ( int j=1; j <= numSamples; j++ ) { -// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()]; -// } -// } else { // k > 0 -// final double[] kMinus1 = logY.getkMinus1(); -// final double[] kMinus2 = logY.getkMinus2(); -// Arrays.fill(kMinus0,0); -// -// for ( int j = jStart; j <= jStop; j++ ) { -// final double[] gl = genotypeLikelihoods[j]; -// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1]; -// -// double aa = Double.NEGATIVE_INFINITY; -// double ab = Double.NEGATIVE_INFINITY; -// if (k < 2*j-1) -// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()]; -// -// if (k < 2*j) -// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()]; -// -// double log10Max; -// if (k > 1) { -// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()]; -// log10Max = approximateLog10SumLog10(aa, ab, bb); -// } else { -// // we know we aren't considering the BB case, so we can use an optimized log10 function -// log10Max = approximateLog10SumLog10(aa, ab); -// } -// -// // finally, update the L(j,k) value -// kMinus0[j] = log10Max - logDenominator; -// -// String offset = Utils.dupString(' ',k); -// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]); -// } -// } -// -// // update the posteriors vector -// final double log10LofK = kMinus0[jStop]; -// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; -// -// // can we abort early? -// lastK = k; -// maxLog10L = Math.max(maxLog10L, log10LofK); -// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { -// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); -// done = true; -// } -// -// logY.rotate(); -// } -// -// return lastK; - } - public int linearExact(Map GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { @@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return calls; } - // ------------------------------------------------------------------------------------- - // - // Gold standard, but O(N^2), implementation. - // - // TODO -- remove me for clarity in this code - // - // ------------------------------------------------------------------------------------- - public int gdaN2GoldStandard(Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { - int numSamples = GLs.size(); - int numChr = 2*numSamples; - - double[][] logYMatrix = new double[1+numSamples][1+numChr]; - - for (int i=0; i <=numSamples; i++) - for (int j=0; j <=numChr; j++) - logYMatrix[i][j] = Double.NEGATIVE_INFINITY; - - //YMatrix[0][0] = 1.0; - logYMatrix[0][0] = 0.0; - int j=0; - - for ( Map.Entry sample : GLs.entrySet() ) { - j++; - - if ( !sample.getValue().hasLikelihoods() ) - continue; - - //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); - double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector(); - //double logDenominator = Math.log10(2.0*j*(2.0*j-1)); - double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; - - // special treatment for k=0: iteration reduces to: - //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()]; - logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA]; - - for (int k=1; k <= 2*j; k++ ) { - - //double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()]; - double logNumerator[]; - logNumerator = new double[3]; - if (k < 2*j-1) - logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] + - genotypeLikelihoods[idxAA]; - else - logNumerator[0] = Double.NEGATIVE_INFINITY; - - - if (k < 2*j) - logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] + - genotypeLikelihoods[idxAB]; - else - logNumerator[1] = Double.NEGATIVE_INFINITY; - - if (k > 1) - logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] + - genotypeLikelihoods[idxBB]; - else - logNumerator[2] = Double.NEGATIVE_INFINITY; - - double logNum = MathUtils.softMax(logNumerator); - - //YMatrix[j][k] = num/den; - logYMatrix[j][k] = logNum - logDenominator; - } - - } - - for (int k=0; k <= numChr; k++) - log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k]; - - return numChr; - } - private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { int j = logYMatrix.length - 1; System.out.printf("-----------------------------------%n"); @@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); } } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 7b8045581..3c8fd4451 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -168,10 +168,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false) public boolean GSA_PRODUCTION_ONLY = false; - @Hidden - @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false) - public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL; - @Hidden @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; @@ -191,7 +187,6 @@ public class UnifiedArgumentCollection { uac.GLmodel = GLmodel; uac.AFmodel = AFmodel; - uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE; uac.heterozygosity = heterozygosity; uac.PCR_error = PCR_error; uac.GenotypingMode = GenotypingMode; From cc23b0b8a9b184eee6908a2fcbfdf63318be11b1 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Fri, 23 Sep 2011 14:52:31 -0400 Subject: [PATCH 45/68] Fix for recent change modelling unmapped shards: don't invoke optimization to combine mapped and unmapped shards. --- .../sting/gatk/datasources/reads/LowMemoryIntervalSharder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java index ba6321121..bf5f33dc3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java @@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator { */ public FilePointer next() { FilePointer current = wrappedIterator.next(); - while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0) + while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0) current = current.combine(parser,wrappedIterator.next()); return current; } From e1cb5f6459a9e1c534acb583dd94f9412f35322e Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 23 Sep 2011 15:00:03 -0400 Subject: [PATCH 46/68] SnpEff annotator now assigns a functional class to each effect and distinguishes between actual effects and mere modifiers. -We now assign a functional class (nonsense, missense, silent, or none) to each SnpEff effect, and add a SNPEFF_FUNCTIONAL_CLASS annotation to the INFO field of the output VCF. -Effects are now prioritized according to both biological impact and functional class, instead of impact only. -Many of SnpEff's "low-impact" effects are now classified as "modifiers" with lower priority than every other effect. This includes such "effects" as DOWNSTREAM, UPSTREAM, INTRON, GENE, EXON, and others that really describe the location of the variant rather than its biological effect. This code will be short-lived (likely 1.2-only), as the next version of SnpEff will include most of these features directly. Checking this change into Stable+Unstable instead of Unstable because the current functional class stratification in VariantEval is basically broken and urgently needs to be fixed for production purposes. --- .../sting/gatk/walkers/annotator/SnpEff.java | 160 ++++++++++++------ .../stratifications/FunctionalClass.java | 19 ++- .../VariantAnnotatorIntegrationTest.java | 4 +- .../VariantEvalIntegrationTest.java | 2 +- 4 files changed, 126 insertions(+), 59 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 8f99c6118..973b3277d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -82,7 +82,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3), GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4), TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6), - EXON_ID_KEY ("SNPEFF_EXON_ID", 7); + EXON_ID_KEY ("SNPEFF_EXON_ID", 7), + FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1); // Actual text of the key private final String keyName; @@ -108,48 +109,73 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio // Possible SnpEff biological effects. All effect names found in the SnpEff input file // are validated against this list. public enum EffectType { - NONE, - CHROMOSOME, - INTERGENIC, - UPSTREAM, - UTR_5_PRIME, - UTR_5_DELETED, - START_GAINED, - SPLICE_SITE_ACCEPTOR, - SPLICE_SITE_DONOR, - START_LOST, - SYNONYMOUS_START, - NON_SYNONYMOUS_START, - CDS, - GENE, - TRANSCRIPT, - EXON, - EXON_DELETED, - NON_SYNONYMOUS_CODING, - SYNONYMOUS_CODING, - FRAME_SHIFT, - CODON_CHANGE, - CODON_INSERTION, - CODON_CHANGE_PLUS_CODON_INSERTION, - CODON_DELETION, - CODON_CHANGE_PLUS_CODON_DELETION, - STOP_GAINED, - SYNONYMOUS_STOP, - NON_SYNONYMOUS_STOP, - STOP_LOST, - INTRON, - UTR_3_PRIME, - UTR_3_DELETED, - DOWNSTREAM, - INTRON_CONSERVED, - INTERGENIC_CONSERVED, - REGULATION, - CUSTOM, - WITHIN_NON_CODING_GENE + // High-impact effects: + FRAME_SHIFT (EffectFunctionalClass.NONE, false), + STOP_GAINED (EffectFunctionalClass.NONSENSE, false), + START_LOST (EffectFunctionalClass.NONE, false), + SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false), + SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false), + EXON_DELETED (EffectFunctionalClass.NONE, false), + STOP_LOST (EffectFunctionalClass.NONE, false), + + // Moderate-impact effects: + NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false), + CODON_CHANGE (EffectFunctionalClass.NONE, false), + CODON_INSERTION (EffectFunctionalClass.NONE, false), + CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false), + CODON_DELETION (EffectFunctionalClass.NONE, false), + CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false), + UTR_5_DELETED (EffectFunctionalClass.NONE, false), + UTR_3_DELETED (EffectFunctionalClass.NONE, false), + + // Low-impact effects: + SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false), + SYNONYMOUS_START (EffectFunctionalClass.SILENT, false), + NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false), + SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false), + NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false), + START_GAINED (EffectFunctionalClass.NONE, false), + + // Modifiers: + NONE (EffectFunctionalClass.NONE, true), + CHROMOSOME (EffectFunctionalClass.NONE, true), + INTERGENIC (EffectFunctionalClass.NONE, true), + UPSTREAM (EffectFunctionalClass.NONE, true), + UTR_5_PRIME (EffectFunctionalClass.NONE, true), + CDS (EffectFunctionalClass.NONE, true), + GENE (EffectFunctionalClass.NONE, true), + TRANSCRIPT (EffectFunctionalClass.NONE, true), + EXON (EffectFunctionalClass.NONE, true), + INTRON (EffectFunctionalClass.NONE, true), + UTR_3_PRIME (EffectFunctionalClass.NONE, true), + DOWNSTREAM (EffectFunctionalClass.NONE, true), + INTRON_CONSERVED (EffectFunctionalClass.NONE, true), + INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true), + REGULATION (EffectFunctionalClass.NONE, true), + CUSTOM (EffectFunctionalClass.NONE, true), + WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true); + + private final EffectFunctionalClass functionalClass; + private final boolean isModifier; + + EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) { + this.functionalClass = functionalClass; + this.isModifier = isModifier; + } + + public EffectFunctionalClass getFunctionalClass() { + return functionalClass; + } + + public boolean isModifier() { + return isModifier; + } } - // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. + // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of + // classifying some of the LOW impact effects as MODIFIERs. public enum EffectImpact { + MODIFIER (0), LOW (1), MODERATE (2), HIGH (3); @@ -163,6 +189,10 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio public boolean isHigherImpactThan ( EffectImpact other ) { return this.severityRating > other.severityRating; } + + public boolean isSameImpactAs ( EffectImpact other ) { + return this.severityRating == other.severityRating; + } } // SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information. @@ -172,6 +202,24 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio UNKNOWN } + // We assign a functional class to each SnpEff effect. + public enum EffectFunctionalClass { + NONE (0), + SILENT (1), + MISSENSE (2), + NONSENSE (3); + + private final int priority; + + EffectFunctionalClass ( int priority ) { + this.priority = priority; + } + + public boolean isHigherPriorityThan ( EffectFunctionalClass other ) { + return this.priority > other.priority; + } + } + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // without providing a SnpEff rod via --snpEffFile): @@ -336,7 +384,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio InfoFieldKey.GENE_NAME_KEY.getKeyName(), InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), - InfoFieldKey.EXON_ID_KEY.getKeyName() + InfoFieldKey.EXON_ID_KEY.getKeyName(), + InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName() ); } @@ -349,7 +398,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"), - new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant") + new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"), + new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())) ); } @@ -411,11 +461,16 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio return; } - try { - impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]); + if ( effect != null && effect.isModifier() ) { + impact = EffectImpact.MODIFIER; } - catch ( IllegalArgumentException e ) { - parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()])); + else { + try { + impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]); + } + catch ( IllegalArgumentException e ) { + parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()])); + } } codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()]; @@ -472,9 +527,17 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } // Otherwise, both effects are either in or not in a coding gene, so we compare the impacts - // of the effects themselves: + // of the effects themselves. Effects with the same impact are tie-broken using the + // functional class of the effect: - return impact.isHigherImpactThan(other.impact); + if ( impact.isHigherImpactThan(other.impact) ) { + return true; + } + else if ( impact.isSameImpactAs(other.impact) ) { + return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass()); + } + + return false; } public Map getAnnotations() { @@ -488,6 +551,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype); addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID); addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID); + addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString()); return annotations; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 88ffcaaeb..1dc047b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -62,14 +62,17 @@ public class FunctionalClass extends VariantStratifier { annotationId++; } while (eval.hasAttribute(key)); - } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) { - SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString()); - if ( snpEffType == SnpEff.EffectType.STOP_GAINED ) - type = FunctionalType.nonsense; - else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING ) - type = FunctionalType.missense; - else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING ) - type = FunctionalType.silent; + } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) { + try { + SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString()); + if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE ) + type = FunctionalType.nonsense; + else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE ) + type = FunctionalType.missense; + else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT ) + type = FunctionalType.silent; + } + catch ( Exception e ) {} // don't error out if the type isn't supported } if ( type != null ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 2c06c6b7f..04bff8d41 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -132,9 +132,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + - "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429", 1, - Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e") + Arrays.asList("122321a85e448f21679f6ca15c5e22ad") ); executeTest("Testing SnpEff annotations", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index b90e6d0ff..9fe253ecb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12") + Arrays.asList("d9dcb352c53106f54fcc981f15d35a90") ); executeTest("testFunctionClassWithSnpeff", spec); } From 9ea40f2e416727564849b749c0746fb688f6f1be Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 23 Sep 2011 16:37:08 -0400 Subject: [PATCH 47/68] Deletions/Insertions in hard clip and bug fixes * Deletions now count as hard clipped bases in order to recover the original alignment start of a clipped read. * Insertions do not count as hard clipped bases for the same reason. * This created a bug in the previous cigar cleaning function. Fixed. --- .../sting/utils/clipreads/ClippingOp.java | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 47ce8165c..81d00d9d7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -398,7 +398,9 @@ public class ClippingOp { for (int i = 1; i <= 2; i++) { int shift = 0; + int totalHardClip = 0; boolean readHasStarted = false; + boolean addedHardClips = false; while(!cigarStack.empty()) { CigarElement cigarElement = cigarStack.pop(); @@ -408,14 +410,33 @@ public class ClippingOp { cigarElement.getOperator() != CigarOperator.DELETION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) readHasStarted = true; + + else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP) + totalHardClip += cigarElement.getLength(); + else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION) shift += cigarElement.getLength(); - if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) { - if (i==1) + else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION) + totalHardClip += cigarElement.getLength(); + + if (readHasStarted) { + if (i==1) { + if (!addedHardClips) { + if (totalHardClip > 0) + inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP)); + addedHardClips = true; + } inverseCigarStack.push(cigarElement); - else + } + else { + if (!addedHardClips) { + if (totalHardClip > 0) + cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP)); + addedHardClips = true; + } cleanCigar.add(cigarElement); + } } } // first pass (i=1) is from end to start of the cigar elements @@ -434,7 +455,6 @@ public class ClippingOp { private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { int newShift = 0; int oldShift = 0; - int deletionShift = 0; for (CigarElement cigarElement : newCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) @@ -449,34 +469,19 @@ public class ClippingOp { else break; } - - int basesClipped = 0; - for (CigarElement cigarElement : oldCigar.getCigarElements()) { - if (basesClipped > newShift) // are we beyond the clipped region? - break; - - else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift - deletionShift += cigarElement.getLength(); - - else - basesClipped += cigarElement.getLength(); - } - - return newShift - oldShift + deletionShift; + return newShift - oldShift; } private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { - if (cigarElement.getOperator() == CigarOperator.INSERTION) { - int cigarElementLength = cigarElement.getLength(); - if (clippedLength >= cigarElementLength) - return -cigarElement.getLength(); - else - return -clippedLength; - } + // Insertions should be discounted from the total hard clip count + if (cigarElement.getOperator() == CigarOperator.INSERTION) + return -clippedLength; -// if (cigarElement.getOperator() == CigarOperator.DELETION) -// return cigarElement.getLength(); + // Deletions should be added to the total hard clip count + else if (cigarElement.getOperator() == CigarOperator.DELETION) + return cigarElement.getLength(); + // There is no shift if we are not clipping an indel return 0; } From b66841f17941fcd2639c081e8b3d4eeec94b9721 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 17:29:34 -0400 Subject: [PATCH 48/68] Static cache for binomial probability -- Very low level performance optimization --- .../genotyper/UnifiedGenotyperEngine.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 87dd37bf6..9e8cc07a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -544,6 +544,21 @@ public class UnifiedGenotyperEngine { AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; } + private final static double[] binomialProbabilityDepthCache = new double[10000]; + static { + for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) { + binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5); + } + } + + private final double getRefBinomialProb(final int depth) { + if ( depth < binomialProbabilityDepthCache.length ) + return binomialProbabilityDepthCache[depth]; + else + return MathUtils.binomialProbability(0, depth, 0.5); + } + + private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) { if ( contexts == null ) return null; @@ -567,7 +582,7 @@ public class UnifiedGenotyperEngine { depth = context.getExtendedEventPileup().size(); } - P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5); + P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth); } return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false); From fbe3c1e0b3175f7bf838bd93182a7bcbb4c67e9a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 23 Sep 2011 19:00:19 -0400 Subject: [PATCH 50/68] Adding warning on HardClipping Hard Clipping is still under heavy development and should not be used by anyone less prepared than MacGyver. --- .../sting/utils/clipreads/ClippingRepresentation.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java index 0dbe55726..d574ba2f0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java @@ -21,6 +21,8 @@ public enum ClippingRepresentation { SOFTCLIP_BASES, /** + * WARNING: THIS OPTION IS STILL UNDER DEVELOPMENT AND IS NOT SUPPORTED. + * * Change the read's cigar string to hard clip (H, see sam-spec) away the bases. * Hard clipping, unlike soft clipping, actually removes bases from the read, * reducing the resulting file's size but introducing an irrevesible (i.e., From 8ceb93b8ace3ba9d2cb60800a457ebe77c00b32d Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Fri, 23 Sep 2011 21:03:22 -0400 Subject: [PATCH 51/68] Fixed an integration test which crashed on the out of date LSF DRMAA library when run against the obsolete LSF dotkit instead of .combined_LSF_SGE --- .../drmaa/v1_0/JnaSessionIntegrationTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java index 3dfd0550d..48f4c3777 100644 --- a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java @@ -34,6 +34,7 @@ import java.io.File; import java.util.*; public class JnaSessionIntegrationTest extends BaseTest { + private String implementation = null; private static final SessionFactory factory = new JnaSessionFactory(); @Test @@ -44,10 +45,23 @@ public class JnaSessionIntegrationTest extends BaseTest { System.out.println(String.format("DRMAA contact(s): %s", session.getContact())); System.out.println(String.format("DRM system(s): %s", session.getDrmSystem())); System.out.println(String.format("DRMAA implementation(s): %s", session.getDrmaaImplementation())); + this.implementation = session.getDrmaaImplementation(); } - @Test + @Test(dependsOnMethods = { "testDrmaa" }) public void testSubmitEcho() throws Exception { + if (implementation.contains("LSF")) { + System.err.println(" ***********************************************************"); + System.err.println(" *************************************************************"); + System.err.println(" **** ****"); + System.err.println(" **** Skipping JnaSessionIntegrationTest.testSubmitEcho() ****"); + System.err.println(" **** Are you using the dotkit .combined_LSF_SGE? ****"); + System.err.println(" **** ****"); + System.err.println(" *************************************************************"); + System.err.println(" ***********************************************************"); + return; + } + File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out"); Session session = factory.getSession(); session.init(null); From 0e74cc3c749b7dada37a4f26b6943666e6b4c743 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Fri, 23 Sep 2011 21:58:20 -0400 Subject: [PATCH 52/68] a) Treat SNP genotype likelihoods just as indels, in the sense that they're always normalized as PL's so one of them will always be zero. This creates minor numerical differences in Qual and annotations due to numerical approximations in AF computation. b) Intermediate CombineVariants fixes, not ready yet --- .../genotyper/SNPGenotypeLikelihoodsCalculationModel.java | 6 ++++++ .../sting/gatk/walkers/variantutils/CombineVariants.java | 2 +- .../sting/utils/variantcontext/VariantContextUtils.java | 4 +++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 6905ce4a4..3fed4542c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; @@ -122,6 +123,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC aList.add(refAllele); aList.add(altAllele); double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ; + double maxElement = MathUtils.max(dlike[AlleleFrequencyCalculationModel.GenotypeType.AA.ordinal()], + dlike[AlleleFrequencyCalculationModel.GenotypeType.AB.ordinal()],dlike[AlleleFrequencyCalculationModel.GenotypeType.BB.ordinal()]); + for (int i=0; i < dlike.length; i++) + dlike[i] -= maxElement; + GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), aList, dlike, getFilteredDepth(pileup))); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 3e3b29a7f..40b704570 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -274,11 +274,11 @@ public class CombineVariants extends RodWalker { else { mergedVCs = preMergedVCs; } - for ( VariantContext mergedVC : mergedVCs ) { // only operate at the start of events if ( mergedVC == null ) continue; + System.out.println(mergedVC.toString()); HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ca9c71eba..806bd2013 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -594,7 +594,9 @@ public class VariantContextUtils { // if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate) for ( VariantContext vc : VCs ) { - if ( vc.alleles.size() != alleles.size() ) { + if (vc.alleles.size() == 1) + continue; + if ( vc.alleles.size() != alleles.size()) { genotypes = stripPLs(genotypes); break; } From c0bb0cb465c6d0761d524e4b139f182325b7814d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 24 Sep 2011 08:48:33 -0400 Subject: [PATCH 53/68] Make DiploidGenotype enum private to walkers.genotyper --- .../genotype => gatk/walkers/genotyper}/DiploidGenotype.java | 4 ++-- .../gatk/walkers/genotyper/DiploidIndelGenotypePriors.java | 1 - .../gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java | 1 - .../gatk/walkers/genotyper/DiploidSNPGenotypePriors.java | 1 - .../genotyper/SNPGenotypeLikelihoodsCalculationModel.java | 1 - .../walkers/genotyper}/DiploidGenotypeUnitTest.java | 3 ++- .../gatk/walkers/genotyper/GenotypeLikelihoodsUnitTest.java | 1 - 7 files changed, 4 insertions(+), 8 deletions(-) rename public/java/src/org/broadinstitute/sting/{utils/genotype => gatk/walkers/genotyper}/DiploidGenotype.java (98%) rename public/java/test/org/broadinstitute/sting/{utils/genotype => gatk/walkers/genotyper}/DiploidGenotypeUnitTest.java (95%) diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java index 1c2cfe2e1..b5987963f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.genotype; +package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.BaseUtils; @@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils; * Time: 6:46:09 PM * To change this template use File | Settings | File Templates. */ -public enum DiploidGenotype { +enum DiploidGenotype { AA ('A', 'A'), AC ('A', 'C'), AG ('A', 'G'), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java index 696a74de8..d8c911092 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index ec180f0cd..71eea2467 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.pileup.FragmentPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java index b9ed17d3e..71854591f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 6905ce4a4..30acd6896 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/DiploidGenotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/utils/genotype/DiploidGenotypeUnitTest.java rename to public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeUnitTest.java index e4f8b12e3..4e72b37a4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/DiploidGenotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeUnitTest.java @@ -1,5 +1,6 @@ -package org.broadinstitute.sting.utils.genotype; +package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsUnitTest.java index 9882ce869..425b969e2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsUnitTest.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.testng.Assert; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; From f792353dcd59948aab4da10521b32adeb990487d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 24 Sep 2011 08:56:45 -0400 Subject: [PATCH 54/68] Framework for genotype unit test --- .../sting/utils/variantcontext/Genotype.java | 3 +- .../variantcontext/GenotypeUnitTest.java | 84 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index fd8c70abe..7ab3f81f0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -268,7 +268,8 @@ public class Genotype { * @param the value type * @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys */ - public static , V> String sortedString(Map c) { + private static , V> String sortedString(Map c) { + // NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS List t = new ArrayList(c.keySet()); Collections.sort(t); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java new file mode 100644 index 000000000..c4f1efd04 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.variantcontext; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.List; + + +public class GenotypeUnitTest extends BaseTest { + Allele A, Aref, T; + + @BeforeSuite + public void before() { + A = Allele.create("A"); + Aref = Allele.create("A", true); + T = Allele.create("T"); + } + +// public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { +// public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { +// public Genotype(String sampleName, List alleles, double negLog10PError, double[] log10Likelihoods) +// public Genotype(String sampleName, List alleles, double negLog10PError) +// public Genotype(String sampleName, List alleles) +// public List getAlleles() +// public List getAlleles(Allele allele) +// public Allele getAllele(int i) +// public boolean isPhased() +// public int getPloidy() +// public Type getType() +// public boolean isHom() +// public boolean isHomRef() +// public boolean isHomVar() +// public boolean isHet() +// public boolean isNoCall() +// public boolean isCalled() +// public boolean isAvailable() +// public boolean hasLikelihoods() +// public GenotypeLikelihoods getLikelihoods() +// public boolean sameGenotype(Genotype other) +// public boolean sameGenotype(Genotype other, boolean ignorePhase) +// public String getSampleName() +// public boolean hasNegLog10PError() +// public double getNegLog10PError() +// public double getPhredScaledQual() +// public boolean hasAttribute(String key) +// public Object getAttribute(String key) +// public Object getAttribute(String key, Object defaultValue) +// public String getAttributeAsString(String key, String defaultValue) +// public int getAttributeAsInt(String key, int defaultValue) +// public double getAttributeAsDouble(String key, double defaultValue) +// public boolean getAttributeAsBoolean(String key, boolean defaultValue) +} From 92acff46e5e99a8875eafa626c45a83b3751d01b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 24 Sep 2011 09:14:05 -0400 Subject: [PATCH 55/68] Moved Haplotype into Utils root --- .../sting/gatk/walkers/annotator/HaplotypeScore.java | 2 +- .../IndelGenotypeLikelihoodsCalculationModel.java | 7 +++---- .../gatk/walkers/indels/HaplotypeIndelErrorModel.java | 2 +- .../sting/gatk/walkers/indels/PairHMMIndelErrorModel.java | 2 +- .../sting/utils/{genotype => }/Haplotype.java | 3 +-- 5 files changed, 7 insertions(+), 9 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/{genotype => }/Haplotype.java (98%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index df6da3b85..c142109fa 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index ec5eefd60..11cde5ffe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -34,10 +34,9 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; @@ -396,8 +395,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength, (int)ref.getWindow().size(), loc.getStart(), numPrefBases); //System.out.println(eventLength); - haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), - ref, hsize, numPrefBases); + haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(), + ref, hsize, numPrefBases); // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 232e468f9..3b3f54b05 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -26,9 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 2d7969230..4f9bccc42 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -29,8 +29,8 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java rename to public/java/src/org/broadinstitute/sting/utils/Haplotype.java index a17e81461..ce2ca2c28 100755 --- a/public/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -22,10 +22,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.genotype; +package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; From 6804ab6d2f23afeb573e12f1238ebfc17c32fe33 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 24 Sep 2011 09:25:29 -0400 Subject: [PATCH 56/68] Bug fix for NPE in very short GATK runs -- Was already in unstable, but not stable... --- .../broadinstitute/sting/gatk/traversals/TraversalEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 27fd173cb..c6321e2ad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -358,7 +358,7 @@ public abstract class TraversalEngine,Provide public void printOnTraversalDone() { printProgress(null, null, true); - final double elapsed = timer.getElapsedTime(); + final double elapsed = timer == null ? 0 : timer.getElapsedTime(); ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics(); From cd058dd10f337ab83919eaaba1f525e1f619b9f7 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sat, 24 Sep 2011 13:40:11 -0400 Subject: [PATCH 57/68] a) Fixed md5 for legit change in UG output that now also no-calls genotypes w/0,0,0 in PL's in SNP case. b) First reimplementation of new vc merger of different types. Previous version did it in two steps, first merging all vc's per type and then trying to see if resulting vc's would be merged if alleles of one type were a subset of another, but this won't work when uniquifying genotypes since sample names would be messed up and GT sample names wouldn't match VC sample names. Now, it's actually simpler: when splitting vc's by type before merging, we check for alleles of one vc being a subset of alleles of vc of another type and if so we put them together in same list. --- .../walkers/variantutils/CombineVariants.java | 36 ++-------------- .../variantcontext/VariantContextUtils.java | 42 +++++++++++++++++-- .../UnifiedGenotyperIntegrationTest.java | 2 +- 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 40b704570..707f940fe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -234,47 +234,17 @@ public class CombineVariants extends RodWalker { if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - List preMergedVCs = new ArrayList(); + List mergedVCs = new ArrayList(); Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); // iterate over the types so that it's deterministic for ( VariantContext.Type type : VariantContext.Type.values() ) { if ( VCsByType.containsKey(type) ) - preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } - List mergedVCs = new ArrayList(); - // se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record, - // we will still merge those records. - if (preMergedVCs.size() > 1) { - for (VariantContext vc1 : preMergedVCs) { - VariantContext newvc = vc1; - boolean merged = false; - for (int k=0; k < mergedVCs.size(); k++) { - VariantContext vc2 = mergedVCs.get(k); - - if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) { - // all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2 - List vcpair = new ArrayList(); - vcpair.add(vc1); - vcpair.add(vc2); - newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair, - priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, - SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); - mergedVCs.set(k,newvc); - merged = true; - break; - } - } - if (!merged) - mergedVCs.add(vc1); - } - } - else { - mergedVCs = preMergedVCs; - } - for ( VariantContext mergedVC : mergedVCs ) { + for ( VariantContext mergedVC : mergedVCs ) { // only operate at the start of events if ( mergedVC == null ) continue; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 0ca067cec..f4f03e8f0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -758,9 +758,45 @@ public class VariantContextUtils { public static Map> separateVariantContextsByType(Collection VCs) { HashMap> mappedVCs = new HashMap>(); for ( VariantContext vc : VCs ) { - if ( !mappedVCs.containsKey(vc.getType()) ) - mappedVCs.put(vc.getType(), new ArrayList()); - mappedVCs.get(vc.getType()).add(vc); + + // look at previous variant contexts of different type. If: + // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list + // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list) + // c) neither: do nothing, just add vc to its own list + boolean addtoOwnList = true; + for (VariantContext.Type type : VariantContext.Type.values()) { + if (type.equals(vc.getType())) + continue; + + if (!mappedVCs.containsKey(type)) + continue; + + List vcList = mappedVCs.get(type); + for (int k=0; k < vcList.size(); k++) { + VariantContext otherVC = vcList.get(k); + if (allelesAreSubset(otherVC,vc)) { + // otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list + vcList.remove(k); + // avoid having empty lists + if (vcList.size() == 0) + mappedVCs.remove(vcList); + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(otherVC); + break; + } + else if (allelesAreSubset(vc,otherVC)) { + // vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own + mappedVCs.get(type).add(vc); + addtoOwnList = false; + } + } + } + if (addtoOwnList) { + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(vc); + } } return mappedVCs; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 41496bdf1..488b3ccd9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -42,7 +42,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("ec43daadfb15b00b41aeb0017a45df0b")); + Arrays.asList("6458f3b8fe4954e2ffc2af972aaab19e")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } From c31f4cb2f6cee438719a62a6216603a591ca0829 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 24 Sep 2011 14:33:32 -0400 Subject: [PATCH 58/68] Cleaning leading insertions With the current implementation, a read cannot start with a deletion or an insertion. Maybe this will change in the future, but for now, chop the leading insertion off. --- .../sting/utils/clipreads/ReadClipper.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index d0e7f8371..3b83617cf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.clipreads; import com.google.java.contract.Requires; -import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; @@ -9,7 +8,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; /** @@ -185,4 +183,21 @@ public class ReadClipper { } } } + + public SAMRecord hardClipLeadingInsertions() { + for(CigarElement cigarElement : read.getCigar().getCigarElements()) { + if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && + cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) + break; + + else if (cigarElement.getOperator() == CigarOperator.INSERTION) { + this.addOp(new ClippingOp(0, cigarElement.getLength() - 1)); + } + + else if (cigarElement.getOperator() == CigarOperator.DELETION) { + throw new ReviewedStingException("No read should start with a deletion. Aligner bug?"); + } + } + return clipRead(ClippingRepresentation.HARDCLIP_BASES); + } } From 203517fbb7c69f087c67b529e2bff8874c584e2d Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sat, 24 Sep 2011 19:08:00 -0400 Subject: [PATCH 60/68] a) Cleanups/bug fixes to previous commit to CombineVariants. b) Change md5 to reflect records that are now merged correctly. c) Change unit merge alleles test to reflect the fact that a null non-variant vc object is not valid and not supported because there's no way to codify such object in a vcf. The code correctly converts this to a non-variant single-base event with whatever the reference is at that location. --- .../walkers/variantutils/CombineVariants.java | 3 +-- .../utils/variantcontext/VariantContext.java | 2 +- .../variantcontext/VariantContextUtils.java | 1 + .../CombineVariantsIntegrationTest.java | 2 +- .../VariantContextUtilsUnitTest.java | 17 ++++++++++++++--- 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 707f940fe..ce03dfffe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -233,7 +233,7 @@ public class CombineVariants extends RodWalker { if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - + List mergedVCs = new ArrayList(); Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); // iterate over the types so that it's deterministic @@ -248,7 +248,6 @@ public class CombineVariants extends RodWalker { // only operate at the start of events if ( mergedVC == null ) continue; - System.out.println(mergedVC.toString()); HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index c1623d2cb..412cbd90b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1495,7 +1495,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati // Do not change the filter state if filters were not applied to this context Set inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null; - return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes()); + return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes(),refByte); } else return inputVC; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index f4f03e8f0..ae648d547 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -789,6 +789,7 @@ public class VariantContextUtils { // vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own mappedVCs.get(type).add(vc); addtoOwnList = false; + break; } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 0205fe0eb..3b1a97369 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("212d9d3df10bb29e2c7fb226da422dc0")); + Arrays.asList("b14f8cbb5d03a2e613b12da4da9efd9a")); executeTest("threeWayWithRefs", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 15a5549b2..498f7c12c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -38,7 +38,7 @@ import java.io.FileNotFoundException; import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, C, delRef, ATC, ATCATC; + Allele Aref, T, C, delRef, Cref, ATC, ATCATC; private GenomeLocParser genomeLocParser; @BeforeSuite @@ -54,6 +54,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { // alleles Aref = Allele.create("A", true); + Cref = Allele.create("C", true); delRef = Allele.create("-", true); T = Allele.create("T"); C = Allele.create("C"); @@ -94,7 +95,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { int stop = start; // alleles.contains(ATC) ? start + 3 : start; return new VariantContext(source, "1", start, stop, alleles, genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), - 1.0, filters, null, (byte)'C'); + 1.0, filters, null, Cref.getBases()[0]); } // -------------------------------------------------------------------------------- @@ -148,8 +149,10 @@ public class VariantContextUtilsUnitTest extends BaseTest { Arrays.asList(Aref, C), Arrays.asList(Aref, C, T)); // sorted by allele + // The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant. + // The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference. new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef)); // todo -- FIXME me GdA + Arrays.asList(Cref)); new MergeAllelesTest(Arrays.asList(delRef), Arrays.asList(delRef, ATC), @@ -186,6 +189,14 @@ public class VariantContextUtilsUnitTest extends BaseTest { inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + System.out.println("expected:"); + System.out.println(cfg.expected.toString()); + System.out.println("inputs"); + for (VariantContext vc:inputs) + System.out.println(vc.toString()); + System.out.println("merged:"); + System.out.println(merged.toString()); + Assert.assertEquals(merged.getAlleles(), cfg.expected); } From 4707ab4a7d00168bb0ae4145bf4ebde0c5263cff Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sat, 24 Sep 2011 21:17:15 -0400 Subject: [PATCH 61/68] Added unit tests to test genotype merges with PL's --- .../VariantContextUtilsUnitTest.java | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 498f7c12c..08db7bcde 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -66,6 +66,11 @@ public class VariantContextUtilsUnitTest extends BaseTest { return new Genotype(sample, Arrays.asList(a1, a2)); } + private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double l1, double l2, double l3) { + return new Genotype(sample, Arrays.asList(a1, a2), log10pError, new double[]{l1,l2,l3}); + } + + private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) { return new Genotype(sample, Arrays.asList(a1, a2), log10pError); } @@ -189,13 +194,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); - System.out.println("expected:"); - System.out.println(cfg.expected.toString()); - System.out.println("inputs"); - for (VariantContext vc:inputs) - System.out.println(vc.toString()); - System.out.println("merged:"); - System.out.println(merged.toString()); Assert.assertEquals(merged.getAlleles(), cfg.expected); } @@ -448,7 +446,17 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3))); - // todo -- GDA -- add tests for merging correct PLs + // merging genothpes with PLs + new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1,5,0,3)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2))); + + new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-1,2", "1,2", + makeVC("1", Arrays.asList(Aref,ATC), makeG("s1", Aref, ATC, 1,5,0,3)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)), + // no likelihoods on result since type changes to mixed multiallelic + makeVC("3", Arrays.asList(Aref, ATC, T), makeG("s1", Aref, ATC, 1), makeG("s3", Aref, T, 3))); return MergeGenotypesTest.getTests(MergeGenotypesTest.class); } @@ -489,7 +497,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError()); Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods()); if ( value.hasLikelihoods() ) - Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods()); + Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector()); } } From 9afccd11b1b52bbf37c639e7944f7d1b12ba0a20 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sun, 25 Sep 2011 21:18:56 -0400 Subject: [PATCH 64/68] Minor refactoring: add ability to MathUtils.normalizeFromLog10 to not go to linear domain but just substract max value from log values and return. Use this function in snp and indel GL computation. --- .../SNPGenotypeLikelihoodsCalculationModel.java | 7 ++----- .../walkers/indels/PairHMMIndelErrorModel.java | 10 ++-------- .../broadinstitute/sting/utils/MathUtils.java | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 742d4aadf..9bdc754e9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -122,13 +122,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC aList.add(refAllele); aList.add(altAllele); double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ; - double maxElement = MathUtils.max(dlike[AlleleFrequencyCalculationModel.GenotypeType.AA.ordinal()], - dlike[AlleleFrequencyCalculationModel.GenotypeType.AB.ordinal()],dlike[AlleleFrequencyCalculationModel.GenotypeType.BB.ordinal()]); - for (int i=0; i < dlike.length; i++) - dlike[i] -= maxElement; + // normalize in log space so that max element is zero. GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), - aList, dlike, getFilteredDepth(pileup))); + aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup))); } return refAllele; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 4f9bccc42..31e9819ab 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -1041,20 +1041,14 @@ public class PairHMMIndelErrorModel { double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2]; int k=0; - double maxElement = Double.NEGATIVE_INFINITY; for (int j=0; j < hSize; j++) { for (int i=0; i <= j; i++){ genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j]; - if (haplotypeLikehoodMatrix[i][j] > maxElement) - maxElement = haplotypeLikehoodMatrix[i][j]; } } - // renormalize - for (int i=0; i < genotypeLikelihoods.length; i++) - genotypeLikelihoods[i] -= maxElement; - - return genotypeLikelihoods; + // renormalize so that max element is zero. + return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 0d85f9606..17e74c4f1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -444,11 +444,25 @@ public class MathUtils { * @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed */ public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) { - double[] normalized = new double[array.length]; + return normalizeFromLog10(array, takeLog10OfOutput, false); + } + + public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) { // for precision purposes, we need to add (or really subtract, since they're // all negative) the largest value; also, we need to convert to normal-space. double maxValue = Utils.findMaxEntry(array); + + // we may decide to just normalize in log space with converting to linear space + if (keepInLogSpace) { + for (int i = 0; i < array.length; i++) + array[i] -= maxValue; + return array; + } + + // default case: go to linear space + double[] normalized = new double[array.length]; + for (int i = 0; i < array.length; i++) normalized[i] = Math.pow(10, array[i] - maxValue); From b76dbc72f0d856866689e8b150b0acc39b4ece59 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 26 Sep 2011 08:13:44 -0400 Subject: [PATCH 65/68] Fixed interval navigation bug. If a read was hard clipped away from the current interval, all subsequent reads within that interval (not hardclipped) would be filtered out. Fixed. --- .../sting/utils/sam/ReadUtils.java | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index c328bbc5a..49d79a72c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -157,7 +157,7 @@ public class ReadUtils { * |----------------| (interval) * <--------> (read) */ - public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED} + public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED} /** * God, there's a huge information asymmetry in SAM format: @@ -640,27 +640,35 @@ public class ReadUtils { */ public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) { - int start = getRefCoordSoftUnclippedStart(read); - int stop = getRefCoordSoftUnclippedEnd(read); + int sStart = getRefCoordSoftUnclippedStart(read); + int sStop = getRefCoordSoftUnclippedEnd(read); + int uStart = read.getUnclippedStart(); + int uStop = read.getUnclippedEnd(); if ( !read.getReferenceName().equals(interval.getContig()) ) return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; - else if ( stop < interval.getStart() ) + else if ( uStop < interval.getStart() ) return ReadAndIntervalOverlap.NO_OVERLAP_LEFT; - else if ( start > interval.getStop() ) + else if ( uStart > interval.getStop() ) return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT; - else if ( (start >= interval.getStart()) && - (stop <= interval.getStop()) ) + else if ( sStop < interval.getStart() ) + return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT; + + else if ( sStart > interval.getStop() ) + return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT; + + else if ( (sStart >= interval.getStart()) && + (sStop <= interval.getStop()) ) return ReadAndIntervalOverlap.OVERLAP_CONTAINED; - else if ( (start < interval.getStart()) && - (stop > interval.getStop()) ) + else if ( (sStart < interval.getStart()) && + (sStop > interval.getStop()) ) return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT; - else if ( (start < interval.getStart()) ) + else if ( (sStart < interval.getStart()) ) return ReadAndIntervalOverlap.OVERLAP_LEFT; else From 317b95fa5738ee0434aa8e122408abe88c0b9242 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 26 Sep 2011 11:46:45 -0500 Subject: [PATCH 67/68] Fixing some annotator docs --- .../gatk/walkers/annotator/DepthOfCoverage.java | 15 +++++++++++++-- .../walkers/annotator/DepthPerAlleleBySample.java | 8 ++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 8826de232..864be55b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -19,8 +19,19 @@ import java.util.Map; /** * Total (unfiltered) depth over all samples. * - * Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D - * is N * D + * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample + * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal + * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. + * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the + * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the + * power I have to determine the genotype of the sample at this site, while the AD tells me how many times + * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering + * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like + * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would + * normally be excluded from the statistical calculations going into GQ and QUAL. + * + * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with + * -dcov D is N * D */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 1cd30c51d..5d706d9c5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -24,9 +24,9 @@ import java.util.Map; /** - * The depth of coverage of each VCF allele in this sample + * The depth of coverage of each VCF allele in this sample. * - * Complementary fields that two important ways of thinking about the depth of the data for this sample + * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the @@ -38,8 +38,8 @@ import java.util.Map; * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and - * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base - * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what + * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base + * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what * determine the genotype calls (see below). */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { From 059cdcb1bee9c8fa8aac4bff79706d514082097a Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Mon, 26 Sep 2011 14:58:19 -0400 Subject: [PATCH 68/68] Changing packaging system path for GATK-only Tribble codecs. --- public/packages/GATKEngine.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 4f635f7fb..4364988e7 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -29,7 +29,7 @@ - +