diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 2af29ea70..7e96b609e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -36,6 +36,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GATKDocUtils; +import org.broadinstitute.sting.utils.help.GATKDoclet; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; @@ -175,12 +177,8 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); - formatter.format("Description:%n"); - - WalkerManager walkerManager = engine.getWalkerManager(); - String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType); - - printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH); + formatter.format("For a full description of this walker, see its GATKdocs at:%n"); + formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType)); return additionalHelp.toString(); } @@ -194,8 +192,6 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); - formatter.format("Available analyses:%n"); - // Get the list of walker names from the walker manager. WalkerManager walkerManager = engine.getWalkerManager(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 7426a7726..b7f71c1ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -240,6 +240,14 @@ public class VariantDataManager { if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) { + // normalize QD by event length for indel case + int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now + if (eventLength > 0) // sanity check + value /= (double)eventLength; + + } + if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( Exception e ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java index e1a815913..28b279ccd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java @@ -50,7 +50,7 @@ public class VariantRecalibratorArgumentCollection { @Argument(fullName="numKMeans", shortName="nKM", doc="The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model.", required=false) public int NUM_KMEANS_ITERATIONS = 30; @Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model.", required=false) - public double STD_THRESHOLD = 8.0; + public double STD_THRESHOLD = 14.0; @Argument(fullName="qualThreshold", shortName="qual", doc="If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model.", required=false) public double QUAL_THRESHOLD = 80.0; @Argument(fullName="shrinkage", shortName="shrinkage", doc="The shrinkage parameter in variational Bayes algorithm.", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 0644c669b..0de405d97 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -154,10 +154,10 @@ public class ValidateVariants extends RodWalker { try { switch( type ) { case ALL: - vc.extraStrictValidation(observedRefAllele, rsIDs); + vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); break; case REF: - vc.validateReferenceBases(observedRefAllele); + vc.validateReferenceBases(observedRefAllele, ref.getBase()); break; case IDS: vc.validateRSIDs(rsIDs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index c9b63878d..2afa315ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -67,6 +67,9 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) protected String sampleName = null; + @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) + protected boolean fixReferenceBase = false; + private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; @@ -104,6 +107,10 @@ public class VariantsToVCF extends RodWalker { vc = VariantContext.modifyGenotypes(vc, genotypes); } + // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding + if (fixReferenceBase) { + vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); + } writeRecord(vc, tracker, ref.getBase()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index 366df0c3a..ce03c8093 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -31,29 +31,70 @@ import java.io.*; import java.util.Set; /** - * + * Extend this class to provide a documentation handler for GATKdocs */ public abstract class DocumentedGATKFeatureHandler { private GATKDoclet doclet; + /** + * @return the javadoc RootDoc of this javadoc run + */ protected RootDoc getRootDoc() { return this.doclet.rootDoc; } + /** Set the master doclet driving this handler */ public void setDoclet(GATKDoclet doclet) { this.doclet = doclet; } + /** + * @return the GATKDoclet driving this documentation run + */ public GATKDoclet getDoclet() { return doclet; } - public boolean shouldBeProcessed(ClassDoc doc) { return true; } + /** + * Should return false iff this handler wants GATKDoclet to skip documenting + * this ClassDoc. + * @param doc that is being considered for inclusion in the docs + * @return true if the doclet should document ClassDoc doc + */ + public boolean includeInDocs(ClassDoc doc) { return true; } - public String getDestinationFilename(ClassDoc doc) { - return HelpUtils.getClassName(doc).replace(".", "_") + ".html"; + /** + * Return the flat filename (no paths) that the handler would like the Doclet to + * write out the documentation for ClassDoc doc and its associated Class clazz + * @param doc + * @param clazz + * @return + */ + public String getDestinationFilename(ClassDoc doc, Class clazz) { + return GATKDocUtils.htmlFilenameForClass(clazz); } + /** + * Return the name of the FreeMarker template we will use to process ClassDoc doc. + * + * Note this is a flat filename relative to settings/helpTemplates in the GATK source tree + * @param doc + * @return + * @throws IOException + */ public abstract String getTemplateName(ClassDoc doc) throws IOException; + + /** + * Actually generate the documentation map associated with toProcess + * + * Can use all to provide references and rootDoc for additional information, if necessary. + * Implementing methods should end with a call to setHandlerContext on toProcess, as in: + * + * toProcess.setHandlerContent(summary, rootMap); + * + * @param rootDoc + * @param toProcess + * @param all + */ public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set all); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java new file mode 100644 index 000000000..8efeecd7b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +/** + * @author depristo + * @since 8/8/11 + */ +public class GATKDocUtils { + private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; + private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; + private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; + + public static String htmlFilenameForClass(Class c) { + return c.getName().replace(".", "_") + ".html"; + } + + public static String helpLinksToGATKDocs(Class c) { + String classPath = htmlFilenameForClass(c); + StringBuilder b = new StringBuilder(); + b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); + b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); + b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); + return b.toString(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java index 65c6624d5..1f6db2757 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java @@ -30,19 +30,29 @@ import java.util.HashMap; import java.util.Map; /** -* Created by IntelliJ IDEA. -* User: depristo -* Date: 7/24/11 -* Time: 7:59 PM -* To change this template use File | Settings | File Templates. -*/ -public class GATKDocWorkUnit implements Comparable { - // known at the start - final String name, filename, group; - final DocumentedGATKFeatureHandler handler; - final ClassDoc classDoc; + * Simple collection of all relevant information about something the GATKDoclet can document + * + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/24/11 + * Time: 7:59 PM + */ +class GATKDocWorkUnit implements Comparable { + /** The class that's being documented */ final Class clazz; + /** The name of the thing we are documenting */ + final String name; + /** the filename where we will be writing the docs for this class */ + final String filename; + /** The name of the documentation group (e.g., walkers, read filters) class belongs to */ + final String group; + /** The documentation handler for this class */ + final DocumentedGATKFeatureHandler handler; + /** The javadoc documentation for clazz */ + final ClassDoc classDoc; + /** The annotation that lead to this Class being in GATKDoc */ final DocumentedGATKFeature annotation; + /** When was this walker built, and what's the absolute version number */ final String buildTimestamp, absoluteVersion; // set by the handler @@ -64,12 +74,21 @@ public class GATKDocWorkUnit implements Comparable { this.absoluteVersion = absoluteVersion; } + /** + * Called by the GATKDoclet to set handler provided context for this work unit + * @param summary + * @param forTemplate + */ public void setHandlerContent(String summary, Map forTemplate) { this.summary = summary; this.forTemplate = forTemplate; } - public Map toMap() { + /** + * Return a String -> String map suitable for FreeMarker to create an index to this WorkUnit + * @return + */ + public Map indexDataMap() { Map data = new HashMap(); data.put("name", name); data.put("summary", summary); @@ -78,6 +97,11 @@ public class GATKDocWorkUnit implements Comparable { return data; } + /** + * Sort in order of the name of this WorkUnit + * @param other + * @return + */ public int compareTo(GATKDocWorkUnit other) { return this.name.compareTo(other.name); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 49214237a..f278e593d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -101,9 +101,9 @@ public class GATKDoclet { DocumentedGATKFeature feature = getFeatureForClassDoc(doc); DocumentedGATKFeatureHandler handler = createHandler(doc, feature); - if ( handler != null && handler.shouldBeProcessed(doc) ) { + if ( handler != null && handler.includeInDocs(doc) ) { logger.info("Going to generate documentation for class " + doc); - String filename = handler.getDestinationFilename(doc); + String filename = handler.getDestinationFilename(doc, clazz); GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), filename, feature.groupName(), feature, handler, doc, clazz, @@ -220,7 +220,7 @@ public class GATKDoclet { Set docFeatures = new HashSet(); List> data = new ArrayList>(); for ( GATKDocWorkUnit workUnit : indexData ) { - data.add(workUnit.toMap()); + data.add(workUnit.indexDataMap()); docFeatures.add(workUnit.annotation); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index c69345816..3ca24dc35 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -51,7 +51,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { RootDoc rootDoc; @Override - public boolean shouldBeProcessed(ClassDoc doc) { + public boolean includeInDocs(ClassDoc doc) { return true; // try { // Class type = HelpUtils.getClassForDoc(doc); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 3ea1bb5d6..fff1961c6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -440,7 +440,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return vc subcontext */ public VariantContext subContextFromGenotypes(Collection genotypes, Set alleles) { - return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes()); + return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel()); } @@ -1055,11 +1055,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati * Run all extra-strict validation tests on a Variant Context object * * @param reference the true reference allele + * @param paddedRefBase the reference base used for padding indels * @param rsIDs the true dbSNP IDs */ - public void extraStrictValidation(Allele reference, Set rsIDs) { + public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) { // validate the reference - validateReferenceBases(reference); + validateReferenceBases(reference, paddedRefBase); // validate the RS IDs validateRSIDs(rsIDs); @@ -1074,11 +1075,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati //checkReferenceTrack(); } - public void validateReferenceBases(Allele reference) { + public void validateReferenceBases(Allele reference, Byte paddedRefBase) { // don't validate if we're an insertion if ( !reference.isNull() && !reference.basesMatch(getReference()) ) { throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); } + + // we also need to validate the padding base for simple indels + if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) + throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), (char)getReferenceBaseForIndel().byteValue(), (char)paddedRefBase.byteValue())); } public void validateRSIDs(Set rsIDs) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index f5ed69476..60312dbd2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -17,7 +17,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test standard", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", + "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_129_b36.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); executeTest("test dbsnp", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 049f44845..e81d2670c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -30,7 +30,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) @@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf", 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -144,7 +144,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -249,7 +249,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -275,7 +275,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index f89b80ead..43ea401f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 057053a1c..3ac7e3785 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", - "d33212a84368e821cbedecd4f59756d6", // tranches - "4652dca41222bebdf9d9fda343b2a835", // recal file - "243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF + "0ddd1e0e483d2eaf56004615cea23ec7", // tranches + "58780f63182e139fdbe17f6c18b5b774", // recal file + "f67d844b6252a55452cf4167b77530b1"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 959d073c7..47ba0220f 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -2,7 +2,6 @@ package org.broadinstitute.sting.queue.qscripts import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript -import org.broadinstitute.sting.queue.function.ListWriterFunction import org.broadinstitute.sting.queue.extensions.picard._ import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode @@ -12,6 +11,7 @@ import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils +import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction} class DataProcessingPipeline extends QScript { qscript => @@ -283,12 +283,6 @@ class DataProcessingPipeline extends QScript { ****************************************************************************/ - // General arguments to GATK walkers - trait CommandLineGATKArgs extends CommandLineGATK { - this.reference_sequence = qscript.reference - this.memoryLimit = 4 - this.isIntermediate = true - } // General arguments to non-GATK tools trait ExternalCommonArgs extends CommandLineFunction { @@ -296,6 +290,14 @@ class DataProcessingPipeline extends QScript { this.isIntermediate = true } + // General arguments to GATK walkers + trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs { + this.reference_sequence = qscript.reference + } + + trait SAMargs extends PicardBamFunction with ExternalCommonArgs { + this.maxRecordsInRam = 100000 + } case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) @@ -303,7 +305,7 @@ class DataProcessingPipeline extends QScript { this.out = outIntervals this.mismatchFraction = 0.0 this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - if (!indels.isEmpty) + if (indels != null) this.rodBind :+= RodBind("indels", "VCF", indels) this.scatterCount = nContigs this.analysisName = queueLogDir + outIntervals + ".target" @@ -311,11 +313,12 @@ class DataProcessingPipeline extends QScript { } case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input_file :+= inBams this.targetIntervals = tIntervals this.out = outBam this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - if (!qscript.indels.isEmpty) + if (qscript.indels != null) this.rodBind :+= RodBind("indels", "VCF", qscript.indels) this.consensusDeterminationModel = consensusDeterminationModel this.compress = 0 @@ -393,7 +396,6 @@ class DataProcessingPipeline extends QScript { case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs { this.input = List(inBam) this.output = outLog - this.maxRecordsInRam = 100000 this.REFERENCE_SEQUENCE = qscript.reference this.isIntermediate = false this.analysisName = queueLogDir + outLog + ".validate" @@ -412,8 +414,6 @@ class DataProcessingPipeline extends QScript { this.RGPL = readGroup.pl this.RGPU = readGroup.pu this.RGSM = readGroup.sm - this.memoryLimit = 4 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".rg" this.jobName = queueLogDir + outBam + ".rg" } @@ -439,6 +439,7 @@ class DataProcessingPipeline extends QScript { @Input(doc="bwa alignment index file") var sai = inSai @Output(doc="output aligned bam file") var alignedBam = outBam def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam + this.memoryLimit = 6 this.analysisName = queueLogDir + outBam + ".bwa_sam_se" this.jobName = queueLogDir + outBam + ".bwa_sam_se" } @@ -449,6 +450,7 @@ class DataProcessingPipeline extends QScript { @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 @Output(doc="output aligned bam file") var alignedBam = outBam def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam + this.memoryLimit = 6 this.analysisName = queueLogDir + outBam + ".bwa_sam_pe" this.jobName = queueLogDir + outBam + ".bwa_sam_pe" } @@ -459,6 +461,4 @@ class DataProcessingPipeline extends QScript { this.analysisName = queueLogDir + outBamList + ".bamList" this.jobName = queueLogDir + outBamList + ".bamList" } - - } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 934cf2a3c..59c00b8cd 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript { var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar") @Argument(doc="liftOverPerl", required=false) - var liftOverPerl: File = new File("./perl/liftOverVCF.pl") + var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl") @Argument(shortName = "ver", doc="The SVN version of this release", required=true) var VERSION: String = _ @@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript { //Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name) (inRef.name, outRef.name) match { case ("b37", "hg19") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef) case ("b37", "hg18") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef) case ("b37", "b36") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef) case _ => return null } } @@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript { // b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta")) - exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) + exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta")) refs = List(b37, hg18, exampleFASTA) val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/" @@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript { addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false)) addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) } def initializeStandardDataFiles() = { @@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript { b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")) b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta")) - exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) + exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta")) refs = List(hg19, b37, hg18, b36, exampleFASTA) addResource(new Resource(b37.file, "", b37, false)) @@ -134,6 +134,9 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf", "1000G_indels_for_realignment", b37, true, false)) + addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf", + "indels_mills_devine", b37, true, true)) + // // example call set for wiki tutorial // @@ -152,8 +155,8 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt", "refGene", b37, true, false)) - addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false)) - addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false)) + addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false)) + addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false)) // todo -- chain files? // todo 1000G SNP and indel call sets? @@ -162,7 +165,7 @@ class GATKResourcesBundle extends QScript { // exampleFASTA file // addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) } def createBundleDirectories(dir: File) = { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala index 128d8773c..03f9d3315 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala @@ -52,7 +52,7 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu updateStatus(RunnerStatus.RUNNING) job.run() - updateStatus(RunnerStatus.FAILED) + updateStatus(RunnerStatus.DONE) } override def checkUnknownStatus() {}