diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 7ea515591..1ef452a5c 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram { private String RECAL_FILE = "output.recal_data.csv"; @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) private String OUTPUT_DIR = "analyzeCovariates/"; - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) private String PATH_TO_RSCRIPT = "Rscript"; @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) private String PATH_TO_RESOURCES = "public/R/"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index f6a1c4f31..3be87da80 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; + @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation") + public String familyStr = null; + + @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") + public double minGenotypeQualityP = 0.0; + private VariantAnnotatorEngine engine; private Collection indelBufferContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 529d17285..89e702b64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker { throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } } else - mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); } else if (!FAMILY_STRUCTURE.isEmpty()) { mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index c6a07b5ce..8da118174 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.utils; +import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.regex.Matcher; @@ -32,6 +34,9 @@ public class MendelianViolation { private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + public String getSampleMom() { return sampleMom; @@ -168,4 +173,41 @@ public class MendelianViolation { return true; } + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index c0493fe22..58f7942fe 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -53,7 +53,7 @@ public class RScriptExecutor { public static class RScriptArgumentCollection { @Advanced - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) public String PATH_TO_RSCRIPT = "Rscript"; @Advanced diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index f6edb319f..6ce492c63 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -240,22 +240,34 @@ public class Utils { return ret.toString(); } - //public static String join(String separator, Collection strings) { - // return join( separator, strings.toArray(new String[0]) ); - //} - - public static String join(String separator, Collection objects) { - if(objects.isEmpty()) { + /** + * Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of + * elti objects (note there's no actual space between sep and the elti elements). Returns + * "" if collection is empty. If collection contains just elt, then returns elt.toString() + * + * @param separator the string to use to separate objects + * @param objects a collection of objects. the element order is defined by the iterator over objects + * @param the type of the objects + * @return a non-null string + */ + public static String join(final String separator, final Collection objects) { + if (objects.isEmpty()) { // fast path for empty collection return ""; - } - Iterator iter = objects.iterator(); - final StringBuilder ret = new StringBuilder(iter.next().toString()); - while(iter.hasNext()) { - ret.append(separator); - ret.append(iter.next().toString()); - } + } else { + final Iterator iter = objects.iterator(); + final T first = iter.next(); - return ret.toString(); + if ( ! iter.hasNext() ) // fast path for singleton collections + return first.toString(); + else { // full path for 2+ collection that actually need a join + final StringBuilder ret = new StringBuilder(first.toString()); + while(iter.hasNext()) { + ret.append(separator); + ret.append(iter.next().toString()); + } + return ret.toString(); + } + } } public static String dupString(char c, int nCopies) { diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index ef7cf751e..4f096f86e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -131,19 +132,18 @@ public class BAQ { private final static double EM = 0.33333333333; private final static double EI = 0.25; - private double[][][] EPSILONS = new double[256][256][64]; + private double[][][] EPSILONS = new double[256][256][SAMUtils.MAX_PHRED_SCORE+1]; private void initializeCachedData() { for ( int i = 0; i < 256; i++ ) for ( int j = 0; j < 256; j++ ) - for ( int q = 0; q < 64; q++ ) { - double qual = qual2prob[q < minBaseQual ? minBaseQual : q]; + for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) { EPSILONS[i][j][q] = 1.0; } for ( char b1 : "ACGTacgt".toCharArray() ) { for ( char b2 : "ACGTacgt".toCharArray() ) { - for ( int q = 0; q < 64; q++ ) { + for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) { double qual = qual2prob[q < minBaseQual ? minBaseQual : q]; double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM; EPSILONS[(byte)b1][(byte)b2][q] = e; @@ -152,7 +152,7 @@ public class BAQ { } } - private double calcEpsilon( byte ref, byte read, byte qualB ) { + protected double calcEpsilon( byte ref, byte read, byte qualB ) { return EPSILONS[ref][read][qualB]; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 624d06a71..83c7083d0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; +import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -215,7 +216,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true); // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) - if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) || + if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + " tokens, and saw " + nParts + " )", lineNo); @@ -345,6 +346,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, generateException("The VCF specification requires a valid info field"); if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) { + if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 ) + generateException("The VCF specification does not allow for whitespace in the INFO field"); + int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR); for (int i = 0; i < infoValueSplitSize; i++) { String key; @@ -587,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { try { return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || - isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); + isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) || + isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { @@ -598,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) { try { byte[] buff = new byte[MAGIC_HEADER_LINE.length()]; - stream.read(buff, 0, MAGIC_HEADER_LINE.length()); - String firstLine = new String(buff); - stream.close(); - return firstLine.startsWith(MAGIC_HEADER_LINE); + int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length()); + boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes()); + return eq; +// String firstLine = new String(buff); +// return firstLine.startsWith(MAGIC_HEADER_LINE); } catch ( IOException e ) { return false; + } catch ( RuntimeException e ) { + return false; + } finally { + try { stream.close(); } catch ( IOException e ) {} } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 343146d6d..ca9c71eba 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -316,19 +316,22 @@ public class VariantContextUtils { return pruneVariantContext(vc, null); } - public static VariantContext pruneVariantContext(VariantContext vc, Collection keysToPreserve ) { - MutableVariantContext mvc = new MutableVariantContext(vc); + public static VariantContext pruneVariantContext(final VariantContext vc, final Collection keysToPreserve ) { + final MutableVariantContext mvc = new MutableVariantContext(vc); if ( keysToPreserve == null || keysToPreserve.size() == 0 ) mvc.clearAttributes(); else { - Map d = mvc.getAttributes(); + final Map d = mvc.getAttributes(); mvc.clearAttributes(); for ( String key : keysToPreserve ) if ( d.containsKey(key) ) mvc.putAttribute(key, d.get(key)); } + // this must be done as the ID is stored in the attributes field + if ( vc.hasID() ) mvc.setID(vc.getID()); + Collection gs = mvc.getGenotypes().values(); mvc.clearGenotypes(); for ( Genotype g : gs ) { @@ -443,34 +446,6 @@ public class VariantContextUtils { throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next())); } - - public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { - return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase); - } - - - /** - * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. - * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with - * the sample name - * - * @param genomeLocParser loc parser - * @param unsortedVCs collection of unsorted VCs - * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs - * @param filteredRecordMergeType merge type for filtered records - * @param genotypeMergeOptions merge option for genotypes - * @param annotateOrigin should we annotate the set it came from? - * @param printMessages should we print messages? - * @param inputRefBase the ref base - * @return new VariantContext - */ - public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, - FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, - boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - - return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false); - } - /** * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with @@ -486,12 +461,18 @@ public class VariantContextUtils { * @param setKey the key name of the set * @param filteredAreUncalled are filtered records uncalled? * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? - * @return new VariantContext + * @return new VariantContext representing the merge of unsortedVCs */ - public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, - FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, - boolean annotateOrigin, boolean printMessages, String setKey, - boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) { + public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser, + final Collection unsortedVCs, + final List priorityListOfVCs, + final FilteredRecordMergeType filteredRecordMergeType, + final GenotypeMergeType genotypeMergeOptions, + final boolean annotateOrigin, + final boolean printMessages, + final String setKey, + final boolean filteredAreUncalled, + final boolean mergeInfoWithMaxAC ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) return null; @@ -514,26 +495,28 @@ public class VariantContextUtils { return null; // establish the baseline info from the first VC - VariantContext first = VCs.get(0); - String name = first.getSource(); - GenomeLoc loc = getLocation(genomeLocParser,first); + final VariantContext first = VCs.get(0); + final String name = first.getSource(); + final Allele refAllele = determineReferenceAllele(VCs); - Set alleles = new TreeSet(); - Map genotypes = new TreeMap(); - double negLog10PError = -1; - Set filters = new TreeSet(); - Map attributes = new TreeMap(); - Set inconsistentAttributes = new HashSet(); - String rsID = null; + final Set alleles = new TreeSet(); + final Set filters = new TreeSet(); + final Map attributes = new TreeMap(); + final Set inconsistentAttributes = new HashSet(); + final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant + final Set rsIDs = new LinkedHashSet(1); // most of the time there's one id + + GenomeLoc loc = getLocation(genomeLocParser,first); int depth = 0; int maxAC = -1; - Map attributesWithMaxAC = new TreeMap(); + final Map attributesWithMaxAC = new TreeMap(); + double negLog10PError = -1; VariantContext vcWithMaxAC = null; + Map genotypes = new TreeMap(); // counting the number of filtered and variant VCs - int nFiltered = 0, nVariant = 0; + int nFiltered = 0; - Allele refAllele = determineReferenceAllele(VCs); boolean remapped = false; // cycle through and add info from the other VCs, making sure the loc/reference matches @@ -546,7 +529,7 @@ public class VariantContextUtils { loc = getLocation(genomeLocParser,vc); // get the longest location nFiltered += vc.isFiltered() ? 1 : 0; - nVariant += vc.isVariant() ? 1 : 0; + if ( vc.isVariant() ) variantSources.add(vc.getSource()); AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles); remapped = remapped || alleleMapping.needsRemapping(); @@ -566,8 +549,7 @@ public class VariantContextUtils { // if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0); - if (rsID == null && vc.hasID()) - rsID = vc.getID(); + if ( vc.hasID() && ! vc.getID().equals(VCFConstants.EMPTY_ID_FIELD) ) rsIDs.add(vc.getID()); if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null); // lets see if the string contains a , separator @@ -627,17 +609,16 @@ public class VariantContextUtils { if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) filters.clear(); - // we care about where the call came from - if ( annotateOrigin ) { + if ( annotateOrigin ) { // we care about where the call came from String setValue; - if ( nFiltered == 0 && nVariant == priorityListOfVCs.size() ) // nothing was unfiltered + if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered setValue = "Intersection"; else if ( nFiltered == VCs.size() ) // everything was filtered out setValue = "FilteredInAll"; - else if ( nVariant == 0 ) // everyone was reference + else if ( variantSources.isEmpty() ) // everyone was reference setValue = "ReferenceInAll"; - else { // we are filtered in some subset - List s = new ArrayList(); + else { + LinkedHashSet s = new LinkedHashSet(); for ( VariantContext vc : VCs ) if ( vc.isVariant() ) s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); @@ -652,8 +633,10 @@ public class VariantContextUtils { if ( depth > 0 ) attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); - if ( rsID != null ) - attributes.put(VariantContext.ID_KEY, rsID); + + if ( ! rsIDs.isEmpty() ) { + attributes.put(VariantContext.ID_KEY, Utils.join(",", rsIDs)); + } VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) ); // Trim the padded bases of all alleles if necessary diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index bae8e99ed..e8799e2ab 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest { private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf"); private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf"); private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz"); + private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz"); private FeatureManager manager; private GenomeLocParser genomeLocParser; @@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest { new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ); + new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP); new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null); return FMTest.getTests(FMTest.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 35495d797..ac5a87a1f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -96,8 +96,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); } - @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); } - @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); } @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( @@ -131,4 +131,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); } @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } + + @Test + public void combineDBSNPDuplicateSites() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, + 1, + Arrays.asList("")); + executeTest("combineDBSNPDuplicateSites:", spec); + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java index 2e4dac6da..67943ccb4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java @@ -172,6 +172,17 @@ public class BAQUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testBAQQualRange() { + BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters + final byte ref = (byte)'A'; + final byte alt = (byte)'A'; + + for ( int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++ ) + Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range"); + } + + public void testBAQ(BAQTest test, boolean lookupWithFasta) { BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters diff --git a/public/testdata/HiSeq.10000.bgzip.vcf.gz b/public/testdata/HiSeq.10000.bgzip.vcf.gz new file mode 100644 index 000000000..3f2b9bf14 Binary files /dev/null and b/public/testdata/HiSeq.10000.bgzip.vcf.gz differ