Merge branch 'master' of ssh://gsa1.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2011-08-08 13:08:25 -04:00
commit 1a0e5ab4ba
18 changed files with 202 additions and 70 deletions

View File

@ -36,6 +36,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.GATKDoclet;
import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*; import java.util.*;
@ -175,12 +177,8 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder(); StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp); Formatter formatter = new Formatter(additionalHelp);
formatter.format("Description:%n"); formatter.format("For a full description of this walker, see its GATKdocs at:%n");
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
WalkerManager walkerManager = engine.getWalkerManager();
String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType);
printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
return additionalHelp.toString(); return additionalHelp.toString();
} }
@ -194,8 +192,6 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder(); StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp); Formatter formatter = new Formatter(additionalHelp);
formatter.format("Available analyses:%n");
// Get the list of walker names from the walker manager. // Get the list of walker names from the walker manager.
WalkerManager walkerManager = engine.getWalkerManager(); WalkerManager walkerManager = engine.getWalkerManager();

View File

@ -240,6 +240,14 @@ public class VariantDataManager {
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
} }
if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
// normalize QD by event length for indel case
int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
if (eventLength > 0) // sanity check
value /= (double)eventLength;
}
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
} catch( Exception e ) { } catch( Exception e ) {

View File

@ -50,7 +50,7 @@ public class VariantRecalibratorArgumentCollection {
@Argument(fullName="numKMeans", shortName="nKM", doc="The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model.", required=false) @Argument(fullName="numKMeans", shortName="nKM", doc="The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model.", required=false)
public int NUM_KMEANS_ITERATIONS = 30; public int NUM_KMEANS_ITERATIONS = 30;
@Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model.", required=false) @Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model.", required=false)
public double STD_THRESHOLD = 8.0; public double STD_THRESHOLD = 14.0;
@Argument(fullName="qualThreshold", shortName="qual", doc="If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model.", required=false) @Argument(fullName="qualThreshold", shortName="qual", doc="If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model.", required=false)
public double QUAL_THRESHOLD = 80.0; public double QUAL_THRESHOLD = 80.0;
@Argument(fullName="shrinkage", shortName="shrinkage", doc="The shrinkage parameter in variational Bayes algorithm.", required=false) @Argument(fullName="shrinkage", shortName="shrinkage", doc="The shrinkage parameter in variational Bayes algorithm.", required=false)

View File

@ -154,10 +154,10 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
try { try {
switch( type ) { switch( type ) {
case ALL: case ALL:
vc.extraStrictValidation(observedRefAllele, rsIDs); vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
break; break;
case REF: case REF:
vc.validateReferenceBases(observedRefAllele); vc.validateReferenceBases(observedRefAllele, ref.getBase());
break; break;
case IDS: case IDS:
vc.validateRSIDs(rsIDs); vc.validateRSIDs(rsIDs);

View File

@ -67,6 +67,9 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false)
protected String sampleName = null; protected String sampleName = null;
@Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
protected boolean fixReferenceBase = false;
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>(); private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
private boolean wroteHeader = false; private boolean wroteHeader = false;
@ -104,6 +107,10 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
vc = VariantContext.modifyGenotypes(vc, genotypes); vc = VariantContext.modifyGenotypes(vc, genotypes);
} }
// todo - fix me. This may not be the cleanest way to handle features what need correct indel padding
if (fixReferenceBase) {
vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase());
}
writeRecord(vc, tracker, ref.getBase()); writeRecord(vc, tracker, ref.getBase());
} }

View File

@ -31,29 +31,70 @@ import java.io.*;
import java.util.Set; import java.util.Set;
/** /**
* * Extend this class to provide a documentation handler for GATKdocs
*/ */
public abstract class DocumentedGATKFeatureHandler { public abstract class DocumentedGATKFeatureHandler {
private GATKDoclet doclet; private GATKDoclet doclet;
/**
* @return the javadoc RootDoc of this javadoc run
*/
protected RootDoc getRootDoc() { protected RootDoc getRootDoc() {
return this.doclet.rootDoc; return this.doclet.rootDoc;
} }
/** Set the master doclet driving this handler */
public void setDoclet(GATKDoclet doclet) { public void setDoclet(GATKDoclet doclet) {
this.doclet = doclet; this.doclet = doclet;
} }
/**
* @return the GATKDoclet driving this documentation run
*/
public GATKDoclet getDoclet() { public GATKDoclet getDoclet() {
return doclet; return doclet;
} }
public boolean shouldBeProcessed(ClassDoc doc) { return true; } /**
* Should return false iff this handler wants GATKDoclet to skip documenting
* this ClassDoc.
* @param doc that is being considered for inclusion in the docs
* @return true if the doclet should document ClassDoc doc
*/
public boolean includeInDocs(ClassDoc doc) { return true; }
public String getDestinationFilename(ClassDoc doc) { /**
return HelpUtils.getClassName(doc).replace(".", "_") + ".html"; * Return the flat filename (no paths) that the handler would like the Doclet to
* write out the documentation for ClassDoc doc and its associated Class clazz
* @param doc
* @param clazz
* @return
*/
public String getDestinationFilename(ClassDoc doc, Class clazz) {
return GATKDocUtils.htmlFilenameForClass(clazz);
} }
/**
* Return the name of the FreeMarker template we will use to process ClassDoc doc.
*
* Note this is a flat filename relative to settings/helpTemplates in the GATK source tree
* @param doc
* @return
* @throws IOException
*/
public abstract String getTemplateName(ClassDoc doc) throws IOException; public abstract String getTemplateName(ClassDoc doc) throws IOException;
/**
* Actually generate the documentation map associated with toProcess
*
* Can use all to provide references and rootDoc for additional information, if necessary.
* Implementing methods should end with a call to setHandlerContext on toProcess, as in:
*
* toProcess.setHandlerContent(summary, rootMap);
*
* @param rootDoc
* @param toProcess
* @param all
*/
public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set<GATKDocWorkUnit> all); public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set<GATKDocWorkUnit> all);
} }

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.help;
/**
* @author depristo
* @since 8/8/11
*/
public class GATKDocUtils {
private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/";
private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/";
private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/";
public static String htmlFilenameForClass(Class c) {
return c.getName().replace(".", "_") + ".html";
}
public static String helpLinksToGATKDocs(Class c) {
String classPath = htmlFilenameForClass(c);
StringBuilder b = new StringBuilder();
b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n");
b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
return b.toString();
}
}

View File

@ -30,19 +30,29 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
/** /**
* Simple collection of all relevant information about something the GATKDoclet can document
*
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: depristo * User: depristo
* Date: 7/24/11 * Date: 7/24/11
* Time: 7:59 PM * Time: 7:59 PM
* To change this template use File | Settings | File Templates.
*/ */
public class GATKDocWorkUnit implements Comparable<GATKDocWorkUnit> { class GATKDocWorkUnit implements Comparable<GATKDocWorkUnit> {
// known at the start /** The class that's being documented */
final String name, filename, group;
final DocumentedGATKFeatureHandler handler;
final ClassDoc classDoc;
final Class clazz; final Class clazz;
/** The name of the thing we are documenting */
final String name;
/** the filename where we will be writing the docs for this class */
final String filename;
/** The name of the documentation group (e.g., walkers, read filters) class belongs to */
final String group;
/** The documentation handler for this class */
final DocumentedGATKFeatureHandler handler;
/** The javadoc documentation for clazz */
final ClassDoc classDoc;
/** The annotation that lead to this Class being in GATKDoc */
final DocumentedGATKFeature annotation; final DocumentedGATKFeature annotation;
/** When was this walker built, and what's the absolute version number */
final String buildTimestamp, absoluteVersion; final String buildTimestamp, absoluteVersion;
// set by the handler // set by the handler
@ -64,12 +74,21 @@ public class GATKDocWorkUnit implements Comparable<GATKDocWorkUnit> {
this.absoluteVersion = absoluteVersion; this.absoluteVersion = absoluteVersion;
} }
/**
* Called by the GATKDoclet to set handler provided context for this work unit
* @param summary
* @param forTemplate
*/
public void setHandlerContent(String summary, Map<String, Object> forTemplate) { public void setHandlerContent(String summary, Map<String, Object> forTemplate) {
this.summary = summary; this.summary = summary;
this.forTemplate = forTemplate; this.forTemplate = forTemplate;
} }
public Map<String, String> toMap() { /**
* Return a String -> String map suitable for FreeMarker to create an index to this WorkUnit
* @return
*/
public Map<String, String> indexDataMap() {
Map<String, String> data = new HashMap<String, String>(); Map<String, String> data = new HashMap<String, String>();
data.put("name", name); data.put("name", name);
data.put("summary", summary); data.put("summary", summary);
@ -78,6 +97,11 @@ public class GATKDocWorkUnit implements Comparable<GATKDocWorkUnit> {
return data; return data;
} }
/**
* Sort in order of the name of this WorkUnit
* @param other
* @return
*/
public int compareTo(GATKDocWorkUnit other) { public int compareTo(GATKDocWorkUnit other) {
return this.name.compareTo(other.name); return this.name.compareTo(other.name);
} }

View File

@ -101,9 +101,9 @@ public class GATKDoclet {
DocumentedGATKFeature feature = getFeatureForClassDoc(doc); DocumentedGATKFeature feature = getFeatureForClassDoc(doc);
DocumentedGATKFeatureHandler handler = createHandler(doc, feature); DocumentedGATKFeatureHandler handler = createHandler(doc, feature);
if ( handler != null && handler.shouldBeProcessed(doc) ) { if ( handler != null && handler.includeInDocs(doc) ) {
logger.info("Going to generate documentation for class " + doc); logger.info("Going to generate documentation for class " + doc);
String filename = handler.getDestinationFilename(doc); String filename = handler.getDestinationFilename(doc, clazz);
GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(),
filename, feature.groupName(), filename, feature.groupName(),
feature, handler, doc, clazz, feature, handler, doc, clazz,
@ -220,7 +220,7 @@ public class GATKDoclet {
Set<DocumentedGATKFeature> docFeatures = new HashSet<DocumentedGATKFeature>(); Set<DocumentedGATKFeature> docFeatures = new HashSet<DocumentedGATKFeature>();
List<Map<String, String>> data = new ArrayList<Map<String, String>>(); List<Map<String, String>> data = new ArrayList<Map<String, String>>();
for ( GATKDocWorkUnit workUnit : indexData ) { for ( GATKDocWorkUnit workUnit : indexData ) {
data.add(workUnit.toMap()); data.add(workUnit.indexDataMap());
docFeatures.add(workUnit.annotation); docFeatures.add(workUnit.annotation);
} }

View File

@ -51,7 +51,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
RootDoc rootDoc; RootDoc rootDoc;
@Override @Override
public boolean shouldBeProcessed(ClassDoc doc) { public boolean includeInDocs(ClassDoc doc) {
return true; return true;
// try { // try {
// Class type = HelpUtils.getClassForDoc(doc); // Class type = HelpUtils.getClassForDoc(doc);

View File

@ -440,7 +440,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return vc subcontext * @return vc subcontext
*/ */
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Set<Allele> alleles) { public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Set<Allele> alleles) {
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes()); return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel());
} }
@ -1055,11 +1055,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* Run all extra-strict validation tests on a Variant Context object * Run all extra-strict validation tests on a Variant Context object
* *
* @param reference the true reference allele * @param reference the true reference allele
* @param paddedRefBase the reference base used for padding indels
* @param rsIDs the true dbSNP IDs * @param rsIDs the true dbSNP IDs
*/ */
public void extraStrictValidation(Allele reference, Set<String> rsIDs) { public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<String> rsIDs) {
// validate the reference // validate the reference
validateReferenceBases(reference); validateReferenceBases(reference, paddedRefBase);
// validate the RS IDs // validate the RS IDs
validateRSIDs(rsIDs); validateRSIDs(rsIDs);
@ -1074,11 +1075,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
//checkReferenceTrack(); //checkReferenceTrack();
} }
public void validateReferenceBases(Allele reference) { public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
// don't validate if we're an insertion // don't validate if we're an insertion
if ( !reference.isNull() && !reference.basesMatch(getReference()) ) { if ( !reference.isNull() && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
} }
// we also need to validate the padding base for simple indels
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) )
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), (char)getReferenceBaseForIndel().byteValue(), (char)paddedRefBase.byteValue()));
} }
public void validateRSIDs(Set<String> rsIDs) { public void validateRSIDs(Set<String> rsIDs) {

View File

@ -17,7 +17,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
executeTest("test standard", spec1); executeTest("test standard", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_129_b36.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
1, 1,
Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); Arrays.asList("0367d39a122c8ac0899fb868a82ef728"));
executeTest("test dbsnp", spec2); executeTest("test dbsnp", spec2);

View File

@ -30,7 +30,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" +
" -T CountCovariates" + " -T CountCovariates" +
" -I " + bam + " -I " + bam +
( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -standard" + " -standard" +
" -OQ" + " -OQ" +
" -recalFile %s" + " -recalFile %s" +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf",
1, // just one output file 1, // just one output file
Arrays.asList(md5)); Arrays.asList(md5));
executeTest("testCountCovariatesUseOriginalQuals", spec); executeTest("testCountCovariatesUseOriginalQuals", spec);
@ -144,7 +144,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" +
" -T CountCovariates" + " -T CountCovariates" +
" -I " + bam + " -I " + bam +
" -standard" + " -standard" +
@ -249,7 +249,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" +
" -T CountCovariates" + " -T CountCovariates" +
" -I " + bam + " -I " + bam +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" +
" -L 1:10,000,000-10,200,000" + " -L 1:10,000,000-10,200,000" +
" -cov ReadGroupCovariate" + " -cov ReadGroupCovariate" +
" -cov QualityScoreCovariate" + " -cov QualityScoreCovariate" +
@ -275,7 +275,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" +
" -T CountCovariates" + " -T CountCovariates" +
" -I " + bam + " -I " + bam +
" -cov ReadGroupCovariate" + " -cov ReadGroupCovariate" +

View File

@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest {
" -L chr1:1-50,000,000" + " -L chr1:1-50,000,000" +
" -standard" + " -standard" +
" -OQ" + " -OQ" +
" -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -recalFile /dev/null" + moreArgs, " -recalFile /dev/null" + moreArgs,
0, 0,
new ArrayList<String>(0)); new ArrayList<String>(0));

View File

@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
} }
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"d33212a84368e821cbedecd4f59756d6", // tranches "0ddd1e0e483d2eaf56004615cea23ec7", // tranches
"4652dca41222bebdf9d9fda343b2a835", // recal file "58780f63182e139fdbe17f6c18b5b774", // recal file
"243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF "f67d844b6252a55452cf4167b77530b1"); // cut VCF
@DataProvider(name = "VRTest") @DataProvider(name = "VRTest")
public Object[][] createData1() { public Object[][] createData1() {

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.queue.qscripts
import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.function.ListWriterFunction
import org.broadinstitute.sting.queue.extensions.picard._ import org.broadinstitute.sting.queue.extensions.picard._
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
@ -12,6 +11,7 @@ import net.sf.samtools.SAMFileReader
import net.sf.samtools.SAMFileHeader.SortOrder import net.sf.samtools.SAMFileHeader.SortOrder
import org.broadinstitute.sting.queue.util.QScriptUtils import org.broadinstitute.sting.queue.util.QScriptUtils
import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction}
class DataProcessingPipeline extends QScript { class DataProcessingPipeline extends QScript {
qscript => qscript =>
@ -283,12 +283,6 @@ class DataProcessingPipeline extends QScript {
****************************************************************************/ ****************************************************************************/
// General arguments to GATK walkers
trait CommandLineGATKArgs extends CommandLineGATK {
this.reference_sequence = qscript.reference
this.memoryLimit = 4
this.isIntermediate = true
}
// General arguments to non-GATK tools // General arguments to non-GATK tools
trait ExternalCommonArgs extends CommandLineFunction { trait ExternalCommonArgs extends CommandLineFunction {
@ -296,6 +290,14 @@ class DataProcessingPipeline extends QScript {
this.isIntermediate = true this.isIntermediate = true
} }
// General arguments to GATK walkers
trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
this.reference_sequence = qscript.reference
}
trait SAMargs extends PicardBamFunction with ExternalCommonArgs {
this.maxRecordsInRam = 100000
}
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
@ -303,7 +305,7 @@ class DataProcessingPipeline extends QScript {
this.out = outIntervals this.out = outIntervals
this.mismatchFraction = 0.0 this.mismatchFraction = 0.0
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
if (!indels.isEmpty) if (indels != null)
this.rodBind :+= RodBind("indels", "VCF", indels) this.rodBind :+= RodBind("indels", "VCF", indels)
this.scatterCount = nContigs this.scatterCount = nContigs
this.analysisName = queueLogDir + outIntervals + ".target" this.analysisName = queueLogDir + outIntervals + ".target"
@ -311,11 +313,12 @@ class DataProcessingPipeline extends QScript {
} }
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input_file :+= inBams this.input_file :+= inBams
this.targetIntervals = tIntervals this.targetIntervals = tIntervals
this.out = outBam this.out = outBam
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
if (!qscript.indels.isEmpty) if (qscript.indels != null)
this.rodBind :+= RodBind("indels", "VCF", qscript.indels) this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
this.consensusDeterminationModel = consensusDeterminationModel this.consensusDeterminationModel = consensusDeterminationModel
this.compress = 0 this.compress = 0
@ -393,7 +396,6 @@ class DataProcessingPipeline extends QScript {
case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs { case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
this.input = List(inBam) this.input = List(inBam)
this.output = outLog this.output = outLog
this.maxRecordsInRam = 100000
this.REFERENCE_SEQUENCE = qscript.reference this.REFERENCE_SEQUENCE = qscript.reference
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outLog + ".validate" this.analysisName = queueLogDir + outLog + ".validate"
@ -412,8 +414,6 @@ class DataProcessingPipeline extends QScript {
this.RGPL = readGroup.pl this.RGPL = readGroup.pl
this.RGPU = readGroup.pu this.RGPU = readGroup.pu
this.RGSM = readGroup.sm this.RGSM = readGroup.sm
this.memoryLimit = 4
this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".rg" this.analysisName = queueLogDir + outBam + ".rg"
this.jobName = queueLogDir + outBam + ".rg" this.jobName = queueLogDir + outBam + ".rg"
} }
@ -439,6 +439,7 @@ class DataProcessingPipeline extends QScript {
@Input(doc="bwa alignment index file") var sai = inSai @Input(doc="bwa alignment index file") var sai = inSai
@Output(doc="output aligned bam file") var alignedBam = outBam @Output(doc="output aligned bam file") var alignedBam = outBam
def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam
this.memoryLimit = 6
this.analysisName = queueLogDir + outBam + ".bwa_sam_se" this.analysisName = queueLogDir + outBam + ".bwa_sam_se"
this.jobName = queueLogDir + outBam + ".bwa_sam_se" this.jobName = queueLogDir + outBam + ".bwa_sam_se"
} }
@ -449,6 +450,7 @@ class DataProcessingPipeline extends QScript {
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
@Output(doc="output aligned bam file") var alignedBam = outBam @Output(doc="output aligned bam file") var alignedBam = outBam
def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam
this.memoryLimit = 6
this.analysisName = queueLogDir + outBam + ".bwa_sam_pe" this.analysisName = queueLogDir + outBam + ".bwa_sam_pe"
this.jobName = queueLogDir + outBam + ".bwa_sam_pe" this.jobName = queueLogDir + outBam + ".bwa_sam_pe"
} }
@ -459,6 +461,4 @@ class DataProcessingPipeline extends QScript {
this.analysisName = queueLogDir + outBamList + ".bamList" this.analysisName = queueLogDir + outBamList + ".bamList"
this.jobName = queueLogDir + outBamList + ".bamList" this.jobName = queueLogDir + outBamList + ".bamList"
} }
} }

View File

@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript {
var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar") var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
@Argument(doc="liftOverPerl", required=false) @Argument(doc="liftOverPerl", required=false)
var liftOverPerl: File = new File("./perl/liftOverVCF.pl") var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl")
@Argument(shortName = "ver", doc="The SVN version of this release", required=true) @Argument(shortName = "ver", doc="The SVN version of this release", required=true)
var VERSION: String = _ var VERSION: String = _
@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript {
//Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name) //Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name)
(inRef.name, outRef.name) match { (inRef.name, outRef.name) match {
case ("b37", "hg19") => case ("b37", "hg19") =>
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef) return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef)
case ("b37", "hg18") => case ("b37", "hg18") =>
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef) return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef)
case ("b37", "b36") => case ("b37", "b36") =>
return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef) return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef)
case _ => return null case _ => return null
} }
} }
@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript {
// //
b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")) b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"))
hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta")) hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta"))
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
refs = List(b37, hg18, exampleFASTA) refs = List(b37, hg18, exampleFASTA)
val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/" val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/"
@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false)) addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
} }
def initializeStandardDataFiles() = { def initializeStandardDataFiles() = {
@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript {
b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta")) b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta"))
hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")) hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"))
b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta")) b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta"))
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
refs = List(hg19, b37, hg18, b36, exampleFASTA) refs = List(hg19, b37, hg18, b36, exampleFASTA)
addResource(new Resource(b37.file, "", b37, false)) addResource(new Resource(b37.file, "", b37, false))
@ -134,6 +134,9 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf", addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf",
"1000G_indels_for_realignment", b37, true, false)) "1000G_indels_for_realignment", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
"indels_mills_devine", b37, true, true))
// //
// example call set for wiki tutorial // example call set for wiki tutorial
// //
@ -152,8 +155,8 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt", addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt",
"refGene", b37, true, false)) "refGene", b37, true, false))
addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false)) addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false))
addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false)) addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false))
// todo -- chain files? // todo -- chain files?
// todo 1000G SNP and indel call sets? // todo 1000G SNP and indel call sets?
@ -162,7 +165,7 @@ class GATKResourcesBundle extends QScript {
// exampleFASTA file // exampleFASTA file
// //
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
} }
def createBundleDirectories(dir: File) = { def createBundleDirectories(dir: File) = {

View File

@ -52,7 +52,7 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu
updateStatus(RunnerStatus.RUNNING) updateStatus(RunnerStatus.RUNNING)
job.run() job.run()
updateStatus(RunnerStatus.FAILED) updateStatus(RunnerStatus.DONE)
} }
override def checkUnknownStatus() {} override def checkUnknownStatus() {}