diff --git a/.gitignore b/.gitignore index 456794cea..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,10 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf -private/testdata/onTheFlyOutputTest.vcf -lib -html -gatkdocs -dist -build -resources +build/ +dist/ +dump/ +lib/ +out/ diff --git a/build.xml b/build.xml index 7e7415f08..c6b1afc56 100644 --- a/build.xml +++ b/build.xml @@ -22,7 +22,9 @@ ~ OTHER DEALINGS IN THE SOFTWARE. --> - + Compile and distribute the Sting toolkit @@ -250,11 +252,14 @@ + + - + + @@ -262,6 +267,15 @@ uri="antlib:org.apache.ivy.ant" classpath="${ivy.jar.dir}/${ivy.jar.file}"/> + + + + + @@ -295,7 +309,7 @@ - + @@ -942,6 +956,28 @@ + + + + + + + + + + + + + + + + + + + diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 000000000..bce16045c Binary files /dev/null and b/intellij_example.tar.bz2 differ diff --git a/ivy.xml b/ivy.xml index 0761cb411..1d2f95dc1 100644 --- a/ivy.xml +++ b/ivy.xml @@ -46,7 +46,8 @@ - + + @@ -78,8 +79,8 @@ - - + + diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 1b3e68647..5810bc94f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -181,15 +181,6 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) private double minIndelProportionToTriggerVariant = 0.05; - /** - * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be - * considered consensus. - */ - @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) - private int nContigs = 2; - - - /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). * A value of 0 turns downsampling off. @@ -197,6 +188,14 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) private int downsampleCoverage = 250; + /** + * Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only + * tested for humans (or organisms with n=2). Use at your own risk! + */ + @Hidden + @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) + private int nContigs = 2; + @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) private int debugLevel = 0; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index fab26c9d2..e9ed6b153 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -225,12 +226,24 @@ public class AFCalcPerformanceTest { final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcTestBuilder.PriorType.human); + logger.info(call); final SimpleTimer timer = new SimpleTimer().start(); final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); - call.newNanoTime = timer.getElapsedTimeNano(); - call.newPNonRef = result.getLog10PosteriorOfAFGT0(); - logger.info(call); - logger.info("\t\t" + result); + final long newNanoTime = timer.getElapsedTimeNano(); + if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) { + logger.info("**** ONE IS POLY"); + } + logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0()); + final double speedup = call.runtime / (1.0 * newNanoTime); + logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x"); + for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) { + if ( a.isNonReference() ) { + final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : ""; + logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE); + final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : ""; + logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost); + } + } } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java index b4d105507..cfb67164d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java @@ -45,12 +45,16 @@ public class AFCalcTestBuilder { human } + public int getNumAltAlleles() { + return numAltAlleles; + } + public int getnSamples() { return nSamples; } public AFCalc makeModel() { - return AFCalcFactory.createAFCalc(modelType, nSamples, 4, 4, 2); + return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2); } public double[] makePriors() { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 219c36a05..989f06ec5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -70,12 +70,12 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7d6f319b9edcb1ff8c290fef150a2df8"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9acfe0019efdc91217ee070acb071228"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","dd02890123e07e7412a49475cb6280f1"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","c1d4dd793f61710a1b1fc5d82803210f"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java new file mode 100644 index 000000000..556b7451f --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class AFCalcPerformanceUnitTest extends BaseTest { + @DataProvider(name = "ScalingTests") + public Object[][] makepolyTestProviderLotsOfAlleles() { + List tests = new ArrayList(); + + // list of all high-quality models in the system + final List biAllelicModels = Arrays.asList( + AFCalcFactory.Calculation.EXACT_INDEPENDENT, + AFCalcFactory.Calculation.EXACT_REFERENCE); + + final List multiAllelicModels = Arrays.asList( + AFCalcFactory.Calculation.EXACT_INDEPENDENT); + +// for ( final int nonTypePLs : Arrays.asList(100) ) { +// for ( final int nSamples : Arrays.asList(10000) ) { +// final List alleleCounts = Arrays.asList(50); +// for ( final int nAltAlleles : Arrays.asList(1) ) { + for ( final int nonTypePLs : Arrays.asList(100) ) { + for ( final int nSamples : Arrays.asList(100, 1000) ) { + final List alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500); + for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) { + final List models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels; + for ( final AFCalcFactory.Calculation model : models ) { + for ( final List ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) { + if ( MathUtils.sum(ACs) < nSamples * 2 ) { + final AFCalcTestBuilder testBuilder + = new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human); + tests.add(new Object[]{testBuilder, ACs, nonTypePLs}); + } + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private Pair estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) { + final int evalOverhead = 2; // 2 + final int maxEvalsPerSamplePerAC = 3; + + int minEvals = 0, maxEvals = 0; + + for ( final Allele alt : vc.getAlternateAlleles() ) { + final int AC = vc.getCalledChrCount(alt); + minEvals += AC + evalOverhead; // everyone is hom-var + maxEvals += AC * maxEvalsPerSamplePerAC + 10; + } + + return new Pair(minEvals, maxEvals); + } + + @Test(dataProvider = "ScalingTests") + private void testScaling(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL) { + final AFCalc calc = testBuilder.makeModel(); + final double[] priors = testBuilder.makePriors(); + final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); + final AFCalcResult result = calc.getLog10PNonRef(vc, priors); + final Pair expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL); + final int minEvals = expectedNEvaluation.getFirst(); + final int maxEvals = expectedNEvaluation.getSecond(); + + logger.warn(" min " + minEvals + " obs " + result.getnEvaluations() + " max " + maxEvals + " for test " + testBuilder + " sum(ACs)=" + (int)MathUtils.sum(ACs)); + + Assert.assertTrue(result.getnEvaluations() >= minEvals, + "Actual number of evaluations " + result.getnEvaluations() + " < min number of evals " + minEvals); + Assert.assertTrue(result.getnEvaluations() <= maxEvals, + "Actual number of evaluations " + result.getnEvaluations() + " > max number of evals " + minEvals); + } +} \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index be8fd2fb2..a8ea4b7da 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,7 +21,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "8c52c0955099cca3215a0d78fd455894"); + HCTest(CEUTRIO_BAM, "", "75013fa6a884104f0b1797502b636698"); } @Test @@ -31,7 +31,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "61c1a0fb62d909229af6b5a91dad8b35"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "3cd3363976b1937d801f9f82996f4abe"); } private void HCTestComplexVariants(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index c0823e5c5..6c8fb1f4d 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable { /** * Maps indices of command line arguments to values paired with that argument. */ - public final SortedMap> sites = new TreeMap>(); + public final SortedMap> sites = new TreeMap>(); /** * An ordered, freeform collection of tags. @@ -90,11 +90,11 @@ public class ArgumentMatch implements Iterable { * @param value Value for the argument at this position. * @param tags ordered freeform text tags associated with this argument. */ - private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) { + private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final ArgumentMatchValue value, final Tags tags) { this.label = label; this.definition = definition; - ArrayList values = new ArrayList(); + ArrayList values = new ArrayList(); if( value != null ) values.add(value); sites.put(site,values ); @@ -131,11 +131,11 @@ public class ArgumentMatch implements Iterable { */ @SuppressWarnings("unchecked") ArgumentMatch transform(Multiplexer multiplexer, Object key) { - SortedMap> newIndices = new TreeMap>(); - for(Map.Entry> site: sites.entrySet()) { - List newEntries = new ArrayList(); - for(String entry: site.getValue()) - newEntries.add(multiplexer.transformArgument(key,entry)); + SortedMap> newIndices = new TreeMap>(); + for(Map.Entry> site: sites.entrySet()) { + List newEntries = new ArrayList(); + for(ArgumentMatchValue entry: site.getValue()) + newEntries.add(new ArgumentMatchStringValue(multiplexer.transformArgument(key,entry.asString()))); newIndices.put(site.getKey(),newEntries); } ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition); @@ -165,7 +165,7 @@ public class ArgumentMatch implements Iterable { /** * Iterate over each available token. */ - private Iterator tokenIterator = null; + private Iterator tokenIterator = null; /** * The next site to return. Null if none remain. @@ -175,7 +175,7 @@ public class ArgumentMatch implements Iterable { /** * The next token to return. Null if none remain. */ - String nextToken = null; + ArgumentMatchValue nextToken = null; { siteIterator = sites.keySet().iterator(); @@ -254,9 +254,9 @@ public class ArgumentMatch implements Iterable { * @param site site of the command-line argument to which this value is mated. * @param value Text representation of value to add. */ - public void addValue( ArgumentMatchSite site, String value ) { + public void addValue( ArgumentMatchSite site, ArgumentMatchValue value ) { if( !sites.containsKey(site) || sites.get(site) == null ) - sites.put(site, new ArrayList() ); + sites.put(site, new ArrayList() ); sites.get(site).add(value); } @@ -275,8 +275,8 @@ public class ArgumentMatch implements Iterable { * Return the values associated with this argument match. * @return A collection of the string representation of these value. */ - public List values() { - List values = new ArrayList(); + public List values() { + List values = new ArrayList(); for( ArgumentMatchSite site: sites.keySet() ) { if( sites.get(site) != null ) values.addAll(sites.get(site)); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java new file mode 100644 index 000000000..344b6829a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Holds a reference to a file as an argument match value. + * + * This is useful when the type of the stored file may be a subclass of java.io.File, + * for example a Queue RemoteFile. + */ +public class ArgumentMatchFileValue extends ArgumentMatchValue { + private final File file; + + public ArgumentMatchFileValue(File file) { + this.file = file; + } + + @Override + public String asString() { + return file == null ? null : file.getAbsolutePath(); + } + + @Override + public File asFile() { + return file; + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java index ed2700006..9dfb3afbe 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java @@ -24,38 +24,36 @@ package org.broadinstitute.sting.commandline; -import java.io.File; - /** - * Where an argument match originated, via the commandline or a file. + * Where an argument match originated, via the commandline or a custom provider. */ public class ArgumentMatchSource implements Comparable { public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null); private final ArgumentMatchSourceType type; - private final File file; + private final String description; /** * Creates an argument match source from the specified file. - * @param file File specifying the arguments. Must not be null. + * @param description Where the arguments originated. */ - public ArgumentMatchSource(File file) { - this(ArgumentMatchSourceType.File, file); + public ArgumentMatchSource(String description) { + this(ArgumentMatchSourceType.Provider, description); } - private ArgumentMatchSource(ArgumentMatchSourceType type, File file) { - if (type == ArgumentMatchSourceType.File && file == null) - throw new IllegalArgumentException("An argument match source of type File cannot have a null file."); + private ArgumentMatchSource(ArgumentMatchSourceType type, String description) { + if (type == ArgumentMatchSourceType.Provider && description == null) + throw new IllegalArgumentException("An argument match source provider cannot have a null description."); this.type = type; - this.file = file; + this.description = description; } public ArgumentMatchSourceType getType() { return type; } - public File getFile() { - return file; + public String getDescription() { + return description; } @Override @@ -65,13 +63,13 @@ public class ArgumentMatchSource implements Comparable { ArgumentMatchSource that = (ArgumentMatchSource) o; - return (type == that.type) && (file == null ? that.file == null : file.equals(that.file)); + return (type == that.type) && (description == null ? that.description == null : description.equals(that.description)); } @Override public int hashCode() { int result = type != null ? type.hashCode() : 0; - result = 31 * result + (file != null ? file.hashCode() : 0); + result = 31 * result + (description != null ? description.hashCode() : 0); return result; } @@ -84,15 +82,15 @@ public class ArgumentMatchSource implements Comparable { if (comp != 0) return comp; - File f1 = this.file; - File f2 = that.file; + String d1 = this.description; + String d2 = that.description; - if ((f1 == null) ^ (f2 == null)) { - // If one of the files is null and the other is not - // put the null file first - return f1 == null ? -1 : 1; + if ((d1 == null) ^ (d2 == null)) { + // If one of the descriptions is null and the other is not + // put the null description first + return d1 == null ? -1 : 1; } - return f1 == null ? 0 : f1.compareTo(f2); + return d1 == null ? 0 : d1.compareTo(d2); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java index 3ff6e21d4..118316473 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.commandline; /** - * Type of where an argument match originated, via the commandline or a file. + * Type of where an argument match originated, via the commandline or a some other provider. */ public enum ArgumentMatchSourceType { - CommandLine, File + CommandLine, Provider } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java new file mode 100644 index 000000000..bb2015c3b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Argument values that originated from a string. + */ +public class ArgumentMatchStringValue extends ArgumentMatchValue { + private final String value; + + public ArgumentMatchStringValue(String value) { + this.value = value; + } + + @Override + public String asString() { + return value; + } + + @Override + public File asFile() { + return value == null ? null : new File(value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java new file mode 100644 index 000000000..bed4edfa6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Returns argument values as either strings or values. + */ +public abstract class ArgumentMatchValue { + /** + * @return the value of this argument as a String object. + */ + public abstract String asString(); + + /** + * @return the value of this argument as a File object. + */ + public abstract File asFile(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index dd4a151bf..54ade61f6 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -215,8 +215,8 @@ public abstract class ArgumentTypeDescriptor { * @param matches The matches for the given argument. * @return The value of the argument if available, or null if not present. */ - protected String getArgumentValue( ArgumentDefinition definition, ArgumentMatches matches ) { - Collection argumentValues = getArgumentValues( definition, matches ); + protected ArgumentMatchValue getArgumentValue( ArgumentDefinition definition, ArgumentMatches matches ) { + Collection argumentValues = getArgumentValues( definition, matches ); if( argumentValues.size() > 1 ) throw new UserException.CommandLineException("Multiple values associated with given definition, but this argument expects only one: " + definition.fullName); return argumentValues.size() > 0 ? argumentValues.iterator().next() : null; @@ -244,8 +244,8 @@ public abstract class ArgumentTypeDescriptor { * @param matches The matches for the given argument. * @return The value of the argument if available, or an empty collection if not present. */ - protected Collection getArgumentValues( ArgumentDefinition definition, ArgumentMatches matches ) { - Collection values = new ArrayList(); + protected Collection getArgumentValues( ArgumentDefinition definition, ArgumentMatches matches ) { + Collection values = new ArrayList(); for( ArgumentMatch match: matches ) { if( match.definition.equals(definition) ) values.addAll(match.values()); @@ -310,7 +310,7 @@ public abstract class ArgumentTypeDescriptor { */ protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue(defaultDefinition, matches); + ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); @SuppressWarnings("unchecked") Class parameterType = JVMUtils.getParameterizedTypeClass(type); String name = defaultDefinition.fullName; @@ -328,7 +328,7 @@ public abstract class ArgumentTypeDescriptor { * @param fieldName The name of the field that was parsed. Used for error reporting. * @return The newly created binding object of type bindingClass. */ - public static Object parseBinding(String value, Class parameterType, Type bindingClass, + public static Object parseBinding(ArgumentMatchValue value, Class parameterType, Type bindingClass, String bindingName, Tags tags, String fieldName) { try { String tribbleType = null; @@ -337,7 +337,7 @@ public abstract class ArgumentTypeDescriptor { throw new UserException.CommandLineException( String.format("Unexpected number of positional tags for argument %s : %s. " + "Rod bindings only support -X:type and -X:name,type argument styles", - value, fieldName)); + value.asString(), fieldName)); } else if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style bindingName = tags.getPositionalTags().get(0); @@ -366,7 +366,7 @@ public abstract class ArgumentTypeDescriptor { if ( tribbleType == null ) { // try to determine the file type dynamically - File file = new File(value); + File file = value.asFile(); if ( file.canRead() && file.isFile() ) { FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); if ( featureDescriptor != null ) { @@ -379,7 +379,7 @@ public abstract class ArgumentTypeDescriptor { // IntervalBinding can be created from a normal String Class rawType = (makeRawTypeIfNecessary(bindingClass)); try { - return rawType.getConstructor(String.class).newInstance(value); + return rawType.getConstructor(String.class).newInstance(value.asString()); } catch (NoSuchMethodException e) { /* ignore */ } @@ -399,14 +399,14 @@ public abstract class ArgumentTypeDescriptor { } Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); - return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags); + return ctor.newInstance(parameterType, bindingName, value.asString(), tribbleType, tags); } catch (Exception e) { if ( e instanceof UserException ) throw ((UserException)e); else throw new UserException.CommandLineException( String.format("Failed to parse value %s for argument %s. Message: %s", - value, fieldName, e.getMessage())); + value.asString(), fieldName, e.getMessage())); } } } @@ -517,7 +517,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return true; ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue( defaultDefinition, matches ); + ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); Object result; Tags tags = getArgumentTags(matches); @@ -527,12 +527,12 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { Method valueOf = primitiveToWrapperMap.get(type).getMethod("valueOf",String.class); if(value == null) throw new MissingArgumentValueException(createDefaultArgumentDefinition(source)); - result = valueOf.invoke(null,value.trim()); + result = valueOf.invoke(null,value.asString().trim()); } else if (type.isEnum()) { Object[] vals = type.getEnumConstants(); Object defaultEnumeration = null; // as we look at options, record the default option if it exists for (Object val : vals) { - if (String.valueOf(val).equalsIgnoreCase(value)) return val; + if (String.valueOf(val).equalsIgnoreCase(value == null ? null : value.asString())) return val; try { if (type.getField(val.toString()).isAnnotationPresent(EnumerationArgumentDefault.class)) defaultEnumeration = val; } catch (NoSuchFieldException e) { throw new ReviewedStingException("parsing " + type.toString() + "doesn't contain the field " + val.toString()); } } @@ -544,10 +544,12 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { else if (value == null) throw new MissingArgumentValueException(createDefaultArgumentDefinition(source)); else - throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value); + throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value.asString()); + } else if (type.equals(File.class)) { + result = value == null ? null : value.asFile(); } else { Constructor ctor = type.getConstructor(String.class); - result = ctor.newInstance(value); + result = ctor.newInstance(value == null ? null : value.asString()); } } catch (UserException e) { throw e; diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 15ec9dfe5..d77ae67cf 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -174,7 +174,7 @@ public abstract class CommandLineProgram { ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource(clp.getClass()); - Map> parsedArgs; + Map parsedArgs; // process the args if (clp.canAddArgumentsDynamically()) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java b/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java new file mode 100644 index 000000000..9ab315175 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.commandline; + +/** + * Represents a collection of parsed arguments for an argument source. + * + * Useful for printing out help documents. + */ +public abstract class ParsedArgs { + /** + * @return A compact description of the arguments from an provider/source. + */ + public abstract String getDescription(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java b/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java new file mode 100644 index 000000000..a77e73bcf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.commandline; + +import org.apache.commons.lang.StringUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * A list of string arguments, usually from the command line or an args list file. + */ +public class ParsedListArgs extends ParsedArgs { + private final List args = new ArrayList(); + + public ParsedListArgs() { + } + + public ParsedListArgs(List args) { + this.args.addAll(args); + } + + public void add(String... args) { + this.args.addAll(Arrays.asList(args)); + } + + @Override + public String getDescription() { + return StringUtils.join(this.args, " "); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 0fac195e1..a8b729be4 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -30,6 +30,7 @@ import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -61,7 +62,7 @@ public class ParsingEngine { * Indicates as best as possible where command-line text remains unmatched * to existing arguments. */ - ArgumentMatches argumentMatches = null; + private ArgumentMatches argumentMatches = null; /** * Techniques for parsing and for argument lookup. @@ -88,7 +89,10 @@ public class ParsingEngine { /** * List of tags associated with the given instantiation of the command-line argument. */ - private final Map tags = new IdentityHashMap(); + private final Map tags = new IdentityHashMap(); + + private PluginManager argumentProviderPluginManager = + new PluginManager(ParsingEngineArgumentProvider.class); /** * our log, which we want to capture anything from org.broadinstitute.sting @@ -105,7 +109,10 @@ public class ParsingEngine { argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors()); argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS); - addArgumentSource(ParsingEngineArgumentFiles.class); + List> providers = argumentProviderPluginManager.getPlugins(); + for (Class provider: providers) { + addArgumentSource(provider); + } } /** @@ -117,6 +124,10 @@ public class ParsingEngine { addArgumentSource(null, source); } + public ArgumentMatches getArgumentMatches() { + return argumentMatches; + } + /** * Add an argument source. Argument sources are expected to have * any number of fields with an @Argument annotation attached. @@ -156,29 +167,30 @@ public class ParsingEngine { * @param tokens Tokens passed on the command line. * @return The parsed arguments by file. */ - public SortedMap> parse( String[] tokens ) { + public SortedMap parse( String[] tokens ) { argumentMatches = new ArgumentMatches(); - SortedMap> parsedArgs = new TreeMap>(); + SortedMap parsedArgs = new TreeMap(); List cmdLineTokens = Arrays.asList(tokens); parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs); - ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles(); + List providers = argumentProviderPluginManager.createAllTypes(); - // Load the arguments ONLY into the argument files. - // Validation may optionally run on the rest of the arguments. - loadArgumentsIntoObject(argumentFiles); + for (ParsingEngineArgumentProvider provider: providers) { + // Load the arguments ONLY into the provider. + // Validation may optionally run on the rest of the arguments. + loadArgumentsIntoObject(provider); + } - for (File file: argumentFiles.files) { - List fileTokens = getArguments(file); - parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs); + for (ParsingEngineArgumentProvider provider: providers) { + provider.parse(this, parsedArgs); } return parsedArgs; } - private void parse(ArgumentMatchSource matchSource, List tokens, - ArgumentMatches argumentMatches, SortedMap> parsedArgs) { + public void parse(ArgumentMatchSource matchSource, List tokens, + ArgumentMatches argumentMatches, SortedMap parsedArgs) { ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1); int i = 0; @@ -195,19 +207,44 @@ public class ParsingEngine { } else { if( argumentMatches.hasMatch(lastArgumentMatchSite) && - !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) - argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token ); + !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) + argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, new ArgumentMatchStringValue(token) ); else - argumentMatches.MissingArgument.addValue( site, token ); + argumentMatches.MissingArgument.addValue( site, new ArgumentMatchStringValue(token) ); } i++; } - parsedArgs.put(matchSource, tokens); + parsedArgs.put(matchSource, new ParsedListArgs(tokens)); } - private List getArguments(File file) { + public void parsePairs(ArgumentMatchSource matchSource, List> tokens, + ArgumentMatches argumentMatches, ParsedArgs matchSourceArgs, + SortedMap parsedArgs) { + int i = 0; + for (Pair pair: tokens) { + + ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i); + List matchers = Arrays.asList(ArgumentDefinitions.FullNameDefinitionMatcher, ArgumentDefinitions.ShortNameDefinitionMatcher); + ArgumentDefinition definition = null; + for (DefinitionMatcher matcher: matchers) { + definition = argumentDefinitions.findArgumentDefinition( pair.getFirst(), matcher ); + if (definition != null) + break; + } + if (definition == null) + continue; + ArgumentMatch argumentMatch = new ArgumentMatch(pair.getFirst(), definition, site, new Tags()); + argumentMatches.mergeInto(argumentMatch); + argumentMatch.addValue(site, pair.getSecond()); + i++; + } + + parsedArgs.put(matchSource, matchSourceArgs); + } + + protected List getArguments(File file) { try { if (file.getAbsolutePath().endsWith(".list")) { return getListArguments(file); @@ -283,9 +320,9 @@ public class ParsingEngine { // Ensure that the field contents meet the validation criteria specified by the regular expression. for( ArgumentMatch verifiableMatch: verifiableMatches ) { - for( String value: verifiableMatch.values() ) { - if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) ) - invalidValues.add( new Pair(verifiableArgument, value) ); + for( ArgumentMatchValue value: verifiableMatch.values() ) { + if( verifiableArgument.validation != null && !value.asString().matches(verifiableArgument.validation) ) + invalidValues.add( new Pair(verifiableArgument, value.asString()) ); } } } @@ -629,21 +666,21 @@ class UnmatchedArgumentException extends ArgumentException { private static String formatArguments( ArgumentMatch invalidValues ) { StringBuilder sb = new StringBuilder(); for( ArgumentMatchSite site: invalidValues.sites.keySet() ) - for( String value: invalidValues.sites.get(site) ) { + for( ArgumentMatchValue value: invalidValues.sites.get(site) ) { switch (site.getSource().getType()) { case CommandLine: sb.append( String.format("%nInvalid argument value '%s' at position %d.", - value, site.getIndex()) ); + value.asString(), site.getIndex()) ); break; - case File: - sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.", - value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) ); + case Provider: + sb.append( String.format("%nInvalid argument value '%s' in %s at position %d.", + value.asString(), site.getSource().getDescription(), site.getIndex()) ); break; default: throw new RuntimeException( String.format("Unexpected argument match source type: %s", site.getSource().getType())); } - if(value != null && Utils.dupString(' ',value.length()).equals(value)) + if(value.asString() != null && Utils.dupString(' ',value.asString().length()).equals(value.asString())) sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace."); } return sb.toString(); @@ -696,12 +733,3 @@ class UnknownEnumeratedValueException extends ArgumentException { return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions)); } } - -/** - * Container class to store the list of argument files. - * The files will be parsed after the command line arguments. - */ -class ParsingEngineArgumentFiles { - @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) - public List files = new ArrayList(); -} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java new file mode 100644 index 000000000..3f3921937 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; + +/** + * Container class to store the list of argument files. + * The files will be parsed after the command line arguments. + */ +public class ParsingEngineArgumentFiles extends ParsingEngineArgumentProvider { + @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) + public List files = new ArrayList(); + + @Override + public void parse(ParsingEngine parsingEngine, SortedMap parsedArgs) { + ArgumentMatches argumentMatches = parsingEngine.getArgumentMatches(); + for (File file: this.files) { + List fileTokens = parsingEngine.getArguments(file); + parsingEngine.parse(new ArgumentMatchFileSource(file), fileTokens, argumentMatches, parsedArgs); + } + } +} + +class ArgumentMatchFileSource extends ArgumentMatchSource { + ArgumentMatchFileSource(File file) { + super("file " + file.getAbsolutePath()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java new file mode 100644 index 000000000..a57f8b08a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java @@ -0,0 +1,12 @@ +package org.broadinstitute.sting.commandline; + +import java.util.List; +import java.util.SortedMap; + +/** + * A class that can parse arguments for the engine + */ +public abstract class ParsingEngineArgumentProvider { + public abstract void parse(ParsingEngine parsingEngine, SortedMap parsedArgs); +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 07d9df79a..223e11680 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -157,18 +157,22 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { if ( ! (walker instanceof TreeReducible) ) { throw badNT("nt", engine, walker); - } else { - return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation); } + } + + if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof NanoSchedulable) ) { + throw badNT("nct", engine, walker); + } + + if ( threadAllocation.getNumDataThreads() > 1 ) { + return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation); } else { - if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof NanoSchedulable) ) - throw badNT("nct", engine, walker); return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation); } } private static UserException badNT(final String parallelArg, final GenomeAnalysisEngine engine, final Walker walker) { - throw new UserException.BadArgumentValue("nt", + throw new UserException.BadArgumentValue(parallelArg, String.format("The analysis %s currently does not support parallel execution with %s. " + "Please run your analysis without the %s option.", engine.getWalkerName(walker.getClass()), parallelArg, parallelArg)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index da4eb3955..ac01468eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -86,7 +86,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition definition = createDefaultArgumentDefinition(source); - String fileName = getArgumentValue( definition, matches ); + String fileName = getArgumentValue( definition, matches ).asString(); // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index 83d1b7eb2..f13cb8fa8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -25,15 +25,11 @@ package org.broadinstitute.sting.gatk.io.stubs; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.ArgumentMatches; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import java.io.File; import java.lang.reflect.Type; /** @@ -47,7 +43,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor /** * Create a new SAMFileReader argument, notifying the given engine when that argument has been created. - * @param engine + * @param engine engine */ public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) { this.engine = engine; @@ -62,12 +58,12 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); - String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); + ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); if( readerFileName == null ) throw new UserException.CommandLineException("SAM file compression was supplied, but no associated writer was supplied with it."); - builder.setSAMFile(new File(readerFileName)); + builder.setSAMFile(readerFileName.asFile()); // WARNING: Skipping required side-effect because stub is impossible to generate. engine.addInput(source, builder); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8566f6c63..00c6ddae8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.File; import java.io.OutputStream; import java.lang.annotation.Annotation; import java.lang.reflect.Type; @@ -111,10 +110,10 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { // Extract all possible parameters that could be passed to a BAM file writer? ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); - String writerFileName = getArgumentValue( bamArgumentDefinition, matches ); + ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches ); - String compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); - Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText) : null; + ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); + Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null; boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches); boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches); @@ -124,32 +123,28 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null) { - if(!source.isRequired()) - throw new MissingArgumentValueException(bamArgumentDefinition); - if(generateMD5) + if(writerFileName != null && writerFileName.asFile() == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - } // Create the stub and set parameters. - SAMFileWriterStub stub; - if ( writerFileName != null ) - stub = new SAMFileWriterStub(engine, new File(writerFileName)); - else - stub = new SAMFileWriterStub(engine, defaultOutputStream); + SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); + if (writerFileName != null && writerFileName.asFile() != null ) { + stub = new SAMFileWriterStub(engine, writerFileName.asFile()); - // WARNING: Side effects required by engine! - parsingEngine.addTags(stub,getArgumentTags(matches)); - engine.addOutput(stub); + if ( compressionLevel != null ) + stub.setCompressionLevel(compressionLevel); + if ( indexOnTheFly ) + stub.setIndexOnTheFly(indexOnTheFly); + if ( generateMD5 ) + stub.setGenerateMD5(generateMD5); + if ( simplifyBAM ) + stub.setSimplifyBAM(simplifyBAM); + + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); + engine.addOutput(stub); + } return stub; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 5e1132d45..43350ccc1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -138,8 +138,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source); // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. - String writerFileName = getArgumentValue(defaultArgumentDefinition,matches); - File writerFile = writerFileName != null ? new File(writerFileName) : null; + ArgumentMatchValue writerFileName = getArgumentValue(defaultArgumentDefinition,matches); + File writerFile = writerFileName != null ? writerFileName.asFile() : null; // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default @@ -151,7 +151,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { ? new VariantContextWriterStub(engine, writerFile, argumentSources) : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); - stub.setCompressed(isCompressed(writerFileName)); + stub.setCompressed(isCompressed(writerFileName.asString())); stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches)); stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches)); stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 64be64afa..9a4de3c36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -48,7 +48,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno private Map calculateIC(final VariantContext vc) { final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds); - if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) + if ( genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isVariant()) return null; int idxAA = 0, idxAB = 1, idxBB = 2; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java index 9c0062876..a83adc275 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java @@ -113,23 +113,20 @@ public class PerReadAlleleLikelihoodMap { return likelihoodReadMap.get(p.getRead()); } - public static Allele getMostLikelyAllele(Map alleleMap) { - double minLike = Double.POSITIVE_INFINITY, maxLike = Double.NEGATIVE_INFINITY; + public static Allele getMostLikelyAllele( final Map alleleMap ) { + double maxLike = Double.NEGATIVE_INFINITY; + double prevMaxLike = Double.NEGATIVE_INFINITY; Allele mostLikelyAllele = Allele.NO_CALL; - for (Map.Entry el : alleleMap.entrySet()) { + for (final Map.Entry el : alleleMap.entrySet()) { if (el.getValue() > maxLike) { + prevMaxLike = maxLike; maxLike = el.getValue(); mostLikelyAllele = el.getKey(); + } else if( el.getValue() > prevMaxLike ) { + prevMaxLike = el.getValue(); } - - if (el.getValue() < minLike) - minLike = el.getValue(); - } - if (maxLike-minLike > INDEL_LIKELIHOOD_THRESH) - return mostLikelyAllele; - else - return Allele.NO_CALL; + return (maxLike - prevMaxLike > INDEL_LIKELIHOOD_THRESH ? mostLikelyAllele : Allele.NO_CALL ); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index c737416c5..7cacb2060 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -238,6 +238,19 @@ public class AFCalcResult { return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; } + /** + * Are any of the alleles polymorphic w.r.t. #isPolymorphic? + * + * @param log10minPNonRef the confidence threshold, in log10 space + * @return true if any are poly, false otherwise + */ + public boolean anyPolymorphic(final double log10minPNonRef) { + for ( final Allele a : getAllelesUsedInGenotyping() ) + if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) ) + return true; + return false; + } + /** * Returns the log10 probability that allele is segregating * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index 3794ba240..f13fe4429 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -1,20 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.broadinstitute.sting.utils.AutoFormattingTime; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format + * + * Once opened, calls can be writen to disk with printCallInfo */ public class ExactCallLogger implements Cloneable { private PrintStream callReport = null; @@ -26,7 +24,7 @@ public class ExactCallLogger implements Cloneable { */ public ExactCallLogger(final File outputFile) { try { - callReport = new PrintStream(new FileOutputStream(outputFile)); + callReport = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile), 10000000)); callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile(outputFile, e); @@ -38,32 +36,28 @@ public class ExactCallLogger implements Cloneable { */ public static class ExactCall { final VariantContext vc; - final long origNanoTime; - long newNanoTime = -1; - final double origPNonRef; - double newPNonRef = -1; + final long runtime; + final AFCalcResult originalCall; - public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) { this.vc = vc; - this.origNanoTime = origNanoTime; - this.origPNonRef = origPNonRef; + this.runtime = runtime; + this.originalCall = originalCall; } @Override public String toString() { - return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s", vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), - origPNonRef, - new AutoFormattingTime(origNanoTime / 1e9).toString(), - newPNonRef, - newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + originalCall.getLog10PosteriorOfAFGT0(), + new AutoFormattingTime(runtime / 1e9).toString()); } } - protected void printCallInfo(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final long runtimeNano, - final AFCalcResult result) { + protected final void printCallInfo(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; @@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable { callReport.flush(); } + @Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"}) private void printCallElement(final VariantContext vc, final Object variable, final Object key, @@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable { * Read in a list of ExactCall objects from reader, keeping only those * with starts in startsToKeep or all sites (if this is empty) * - * @param reader - * @param startsToKeep - * @param parser - * @return + * @param reader a just-opened reader sitting at the start of the file + * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept + * @param parser a genome loc parser to create genome locs + * @return a list of ExactCall objects in reader * @throws IOException */ public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { @@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable { // skip the header line reader.readLine(); + // skip the first "type" line + reader.readLine(); + while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List alleles = new ArrayList(); final List genotypes = new ArrayList(); + final double[] posteriors = new double[2]; + final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); + final List mle = new ArrayList(); + final Map log10pNonRefByAllele = new HashMap(); long runtimeNano = -1; GenomeLoc currentLoc = null; @@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable { if (currentLoc == null) currentLoc = lineLoc; - if (variable.equals("log10PosteriorOfAFzero")) { + if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); - calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{})); + final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); + calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { @@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); + } else if (variable.equals("log10PosteriorOfAFEq0")) { + posteriors[0] = Double.valueOf(value); + } else if (variable.equals("log10PosteriorOfAFGt0")) { + posteriors[1] = Double.valueOf(value); + } else if (variable.equals("MLE")) { + mle.add(Integer.valueOf(value)); + } else if (variable.equals("pNonRefByAllele")) { + final Allele a = Allele.create(key); + log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 76d8d85c2..998894fbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -370,8 +370,6 @@ public class IndelRealigner extends ReadWalker { currentInterval = intervals.hasNext() ? intervals.next() : null; - writerToUse = writer; - if ( N_WAY_OUT != null ) { boolean createIndex = true; @@ -383,9 +381,9 @@ public class IndelRealigner extends ReadWalker { createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } } else { - // set up the output writer setupWriter(getToolkit().getSAMFileHeader()); + writerToUse = writer; } manager = new ConstrainedMateFixingManager(writerToUse, getToolkit().getGenomeLocParser(), MAX_ISIZE_FOR_MOVEMENT, MAX_POS_MOVE_ALLOWED, MAX_RECORDS_IN_MEMORY); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 00acf854a..7ebfec49e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -36,7 +36,7 @@ import java.util.*; *
    *
  • In parent/child pairs: If an individual genotype is missing at one site, the other one is phased if it is homozygous. No phasing probability is emitted.
  • *
  • In trios: If the child is missing, parents are treated as separate individuals and phased if homozygous. No phasing probability is emitted.
  • - *
  • In trios: If one of the parents is missing, it is handled like a parent/child pair. Phasing is done unless both the parent and child are heterozygous and a phasing probabilitt is emitted.
  • + *
  • In trios: If one of the parents is missing, it is handled like a parent/child pair. Phasing is done unless both the parent and child are heterozygous and a phasing probability is emitted.
  • *
  • In trios: If two individuals are missing, the remaining individual is phased if it is homozygous. No phasing probability is emitted.
  • *
* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 15c17988c..c7b1d0fc7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -42,11 +42,11 @@ import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.FileNotFoundException; @@ -542,9 +542,11 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS); if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) { - final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull()); - addAnnotations(builder, sub); - sub = builder.make(); + synchronized (UG_engine) { + final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull()); + addAnnotations(builder, sub); + sub = builder.make(); + } } if ( (!EXCLUDE_NON_VARIANTS || sub.isPolymorphicInSamples()) && (!EXCLUDE_FILTERED || !sub.isFiltered()) ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index dd12ce761..49851249c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -32,7 +32,6 @@ import org.reflections.util.ClasspathHelper; import java.io.File; import java.io.IOException; -import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.net.URL; import java.util.*; @@ -198,7 +197,7 @@ public class JVMUtils { * @return the list of class path urls. */ public static Set getClasspathURLs() { - return ClasspathHelper.getUrlsForManifestsCurrentClasspath(); + return ClasspathHelper.forManifest(); } /** @@ -240,8 +239,8 @@ public class JVMUtils { /** * Returns a comma-separated list of the names of the interfaces implemented by this class * - * @param covClass - * @return + * @param covClass class + * @return names of interfaces */ public static String classInterfaces(final Class covClass) { final List interfaces = new ArrayList(); diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index 82fb6b8d6..43cc800d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.utils.classloader; -import ch.qos.logback.classic.Level; -import ch.qos.logback.classic.Logger; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; @@ -35,7 +33,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.reflections.Reflections; import org.reflections.scanners.SubTypesScanner; import org.reflections.util.ConfigurationBuilder; -import org.slf4j.LoggerFactory; import java.io.File; import java.lang.reflect.Constructor; @@ -57,9 +54,8 @@ public class PluginManager { private static final Reflections defaultReflections; static { - // turn off logging in the reflections library - they talk too much (to the wrong logger factory as well, logback) - Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class); - logger.setLevel(Level.OFF); + // turn off logging in the reflections library - they talk too much + Reflections.log = null; Set classPathUrls = new LinkedHashSet(); @@ -179,9 +175,9 @@ public class PluginManager { /** * Sorts, in place, the list of plugins according to getName() on each element * - * @param unsortedPlugins + * @param unsortedPlugins unsorted plugins */ - private final void sortPlugins(final List> unsortedPlugins) { + private void sortPlugins(final List> unsortedPlugins) { Collections.sort(unsortedPlugins, new ComparePluginsByName()); } @@ -235,7 +231,7 @@ public class PluginManager { * @param plugin Name of the plugin for which to search. * @return True if the plugin exists, false otherwise. */ - public boolean exists(Class plugin) { + public boolean exists(Class plugin) { return pluginsByName.containsValue(plugin); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index 25ef8ccd2..0f6808718 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.help; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.ArgumentDefinition; -import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; -import org.broadinstitute.sting.commandline.ArgumentDefinitions; -import org.broadinstitute.sting.commandline.ArgumentMatchSource; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -273,9 +270,9 @@ public class HelpFormatter { * Generate a standard header for the logger * * @param applicationDetails details of the application to run. - * @param parsedArgs the command line arguments passed in + * @param parsedArgs the arguments passed in */ - public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map> parsedArgs) { + public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map parsedArgs) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new java.util.Date(); @@ -286,19 +283,16 @@ public class HelpFormatter { for (String headerLine : applicationDetails.applicationHeader) logger.info(headerLine); logger.debug("Current directory: " + System.getProperty("user.dir")); - for (Map.Entry> entry: parsedArgs.entrySet()) { + for (Map.Entry entry: parsedArgs.entrySet()) { ArgumentMatchSource matchSource = entry.getKey(); final String sourceName; switch (matchSource.getType()) { case CommandLine: sourceName = "Program"; break; - case File: sourceName = matchSource.getFile().getPath(); break; + case Provider: sourceName = matchSource.getDescription(); break; default: throw new RuntimeException("Unexpected argument match source type: " + matchSource.getType()); } - String output = sourceName + " Args:"; - for (String str : entry.getValue()) { - output = output + " " + str; - } + String output = sourceName + " Args: " + entry.getValue().getDescription(); logger.info(output); } logger.info("Date/Time: " + dateFormat.format(date)); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java index 99d6b88f3..b1e788dc5 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java @@ -39,7 +39,7 @@ public class ArgumentMatchSiteUnitTest { @Test public void testFile() { - ArgumentMatchSource source = new ArgumentMatchSource(new File("test")); + ArgumentMatchSource source = new ArgumentMatchFileSource(new File("test")); ArgumentMatchSite site = new ArgumentMatchSite(source, 1); Assert.assertEquals(site.getSource(), source); Assert.assertEquals(site.getIndex(), 1); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java index 4bc7eb822..a183b2001 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java @@ -35,15 +35,15 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { public void testCommandLine() { ArgumentMatchSource source = ArgumentMatchSource.COMMAND_LINE; Assert.assertEquals(source.getType(), ArgumentMatchSourceType.CommandLine); - Assert.assertNull(source.getFile()); + Assert.assertNull(source.getDescription()); } @Test public void testFile() { File f = new File("test"); - ArgumentMatchSource source = new ArgumentMatchSource(f); - Assert.assertEquals(source.getType(), ArgumentMatchSourceType.File); - Assert.assertEquals(source.getFile(), f); + ArgumentMatchSource source = new ArgumentMatchFileSource(f); + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.Provider); + Assert.assertEquals(source.getDescription(), "file " + f.getAbsolutePath()); } @Test(expectedExceptions = IllegalArgumentException.class) @@ -54,8 +54,8 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { @Test public void testEquals() { ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; - ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); - ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + ArgumentMatchSource fileA = new ArgumentMatchFileSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchFileSource(new File("b")); Assert.assertFalse(cmdLine.equals(null)); @@ -75,8 +75,8 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { @Test public void testCompareTo() { ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; - ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); - ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + ArgumentMatchSource fileA = new ArgumentMatchFileSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchFileSource(new File("b")); Assert.assertTrue(cmdLine.compareTo(cmdLine) == 0); Assert.assertTrue(cmdLine.compareTo(fileA) < 0); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index df088a4ad..72724e46a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("26af30187316f742878c85f0ed091837")); + Arrays.asList("48b4f4b05461be276bffc91350f08cbc")); executeTest("test Multiple SNP alleles", spec); } @@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("aa9cf96ab8f5aa844387e3aef1f27249")); + Arrays.asList("04affcc9d720ee17bc221759707e0cd2")); executeTest("test reverse trim", spec); } @@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("d210ee1baa75dd4a0c63aef6b1fa7a8a")); + Arrays.asList("112e7bedfd284d4d9390aa006118c733")); executeTest("test mismatched PLs", spec); } @@ -343,13 +343,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("7fc488fe16dea9f023bfcfdaa908a548")); + Arrays.asList("863ee56b3594f09795644127f2f9539f")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("f3ff7fe0f15f31eadd726c711d6bf3de")); + Arrays.asList("503ca1b75cc7b2679eaa80f7b5e7ef1c")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -452,5 +452,4 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { Arrays.asList("bbf16e1873e525ee5975021cfb8988cf")); executeTest("test calling on a ReducedRead BAM", spec); } - } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 040845828..9b464cfec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -113,4 +113,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest(String.format("realigner [%s]", entry.getKey()), spec); } } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..165e6a4e9 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -13,6 +13,7 @@ import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils import org.broadinstitute.sting.queue.function.ListWriterFunction import org.broadinstitute.sting.commandline.Hidden +import org.broadinstitute.sting.commandline class DataProcessingPipeline extends QScript { qscript => @@ -41,34 +42,34 @@ class DataProcessingPipeline extends QScript { @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ - @Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false) + @Argument(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false) var projectName: String = "project" - @Input(doc="Output path for the processed BAM files.", fullName="output_directory", shortName="outputDir", required=false) + @Argument(doc="Output path for the processed BAM files.", fullName="output_directory", shortName="outputDir", required=false) var outputDir: String = "" - @Input(doc="the -L interval string to be used by GATK - output bams at interval only", fullName="gatk_interval_string", shortName="L", required=false) + @Argument(doc="the -L interval string to be used by GATK - output bams at interval only", fullName="gatk_interval_string", shortName="L", required=false) var intervalString: String = "" @Input(doc="an intervals file to be used by GATK - output bams at intervals only", fullName="gatk_interval_file", shortName="intervals", required=false) var intervals: File = _ - @Input(doc="Cleaning model: KNOWNS_ONLY, USE_READS or USE_SW", fullName="clean_model", shortName="cm", required=false) + @Argument(doc="Cleaning model: KNOWNS_ONLY, USE_READS or USE_SW", fullName="clean_model", shortName="cm", required=false) var cleaningModel: String = "USE_READS" - @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Single Ended reads", fullName="use_bwa_single_ended", shortName="bwase", required=false) + @Argument(doc="Decompose input BAM file and fully realign it using BWA and assume Single Ended reads", fullName="use_bwa_single_ended", shortName="bwase", required=false) var useBWAse: Boolean = false - @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) + @Argument(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) var useBWApe: Boolean = false - @Input(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false) + @Argument(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false) var useBWAsw: Boolean = false - @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) + @Argument(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) var bwaThreads: Int = 1 - @Input(doc="Perform validation on the BAM files", fullName="validation", shortName="vs", required=false) + @Argument(doc="Perform validation on the BAM files", fullName="validation", shortName="vs", required=false) var validation: Boolean = false @@ -76,15 +77,15 @@ class DataProcessingPipeline extends QScript { * Hidden Parameters ****************************************************************************/ @Hidden - @Input(doc="How many ways to scatter/gather", fullName="scatter_gather", shortName="sg", required=false) + @Argument(doc="How many ways to scatter/gather", fullName="scatter_gather", shortName="sg", required=false) var nContigs: Int = -1 @Hidden - @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) + @Argument(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) var defaultPlatform: String = "" @Hidden - @Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false) + @Argument(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false) var testMode: Boolean = false diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index d0379d022..5b84bfd16 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -28,7 +28,7 @@ import function.QFunction import java.io.File import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.util._ -import org.broadinstitute.sting.queue.engine.{QGraphSettings, QGraph} +import org.broadinstitute.sting.queue.engine.{QStatusMessenger, QGraphSettings, QGraph} import collection.JavaConversions._ import org.broadinstitute.sting.utils.classloader.PluginManager import org.broadinstitute.sting.utils.exceptions.UserException @@ -90,29 +90,46 @@ class QCommandLine extends CommandLineProgram with Logging { private var qScriptClasses: File = _ private var shuttingDown = false - private lazy val pluginManager = { + private lazy val qScriptPluginManager = { qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory) qScriptManager.loadScripts(scripts, qScriptClasses) new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL)) } - QFunction.parsingEngine = new ParsingEngine(this) + private lazy val qStatusMessengerPluginManager = { + new PluginManager[QStatusMessenger](classOf[QStatusMessenger]) + } + + ClassFieldCache.parsingEngine = new ParsingEngine(this) /** * Takes the QScripts passed in, runs their script() methods, retrieves their generated * functions, and then builds and runs a QGraph based on the dependencies. */ def execute = { + val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes() + if (settings.qSettings.runName == null) settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName) if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory)) settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp") qGraph.initializeWithSettings(settings) - val allQScripts = pluginManager.createAllTypes() + for (statusMessenger <- allStatusMessengers) { + loadArgumentsIntoObject(statusMessenger) + } + + for (statusMessenger <- allStatusMessengers) { + statusMessenger.started() + } + + val allQScripts = qScriptPluginManager.createAllTypes() for (script <- allQScripts) { - logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) + logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) loadArgumentsIntoObject(script) + // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now. + //if (settings.run) + script.pullInputs() script.qSettings = settings.qSettings try { script.script() @@ -142,11 +159,18 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Writing final jobs report...") qGraph.writeJobsReport() - if (!qGraph.success) { + if (!success) { logger.info("Done with errors") qGraph.logFailed() + for (statusMessenger <- allStatusMessengers) + statusMessenger.exit("Done with errors") 1 } else { + if (settings.run) { + allQScripts.foreach(_.pushOutputs()) + for (statusMessenger <- allStatusMessengers) + statusMessenger.done(allQScripts.map(_.remoteOutputs)) + } 0 } } @@ -158,19 +182,30 @@ class QCommandLine extends CommandLineProgram with Logging { override def canAddArgumentsDynamically = true /** - * Returns the list of QScripts passed in via -S so that their - * arguments can be inspected before QScript.script is called. - * @return Array of QScripts passed in. + * Returns the list of QScripts passed in via -S and other plugins + * so that their arguments can be inspected before QScript.script is called. + * @return Array of dynamic sources */ - override def getArgumentSources = - pluginManager.getPlugins.toIterable.toArray.asInstanceOf[Array[Class[_]]] + override def getArgumentSources = { + var plugins = Seq.empty[Class[_]] + plugins ++= qScriptPluginManager.getPlugins + plugins ++= qStatusMessengerPluginManager.getPlugins + plugins.toArray + } /** - * Returns the name of a QScript - * @return The name of a QScript + * Returns the name of a script/plugin + * @return The name of a script/plugin */ - override def getArgumentSourceName(source: Class[_]) = - pluginManager.getName(source.asSubclass(classOf[QScript])) + override def getArgumentSourceName(source: Class[_]) = { + if (classOf[QScript].isAssignableFrom(source)) + qScriptPluginManager.getName(source.asSubclass(classOf[QScript])) + else if (classOf[QStatusMessenger].isAssignableFrom(source)) + qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger])) + else + null + + } /** * Returns a ScalaCompoundArgumentTypeDescriptor that can parse argument sources into scala collections. diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index 6f887ea00..2dcfb916c 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -27,7 +27,8 @@ package org.broadinstitute.sting.queue import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field -import util.{StringFileConversions, PrimitiveOptionConversions, Logging} +import util._ +import org.broadinstitute.sting.commandline.ArgumentSource /** * Defines a Queue pipeline as a collection of CommandLineFunctions. @@ -106,6 +107,37 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon def addAll(functions: Seq[QFunction]) { functions.foreach( f => add(f) ) } + + def pullInputs() { + val inputs = ClassFieldCache.getFieldFiles(this, inputFields) + for (remoteFile <- filterRemoteFiles(inputs)) { + logger.info("Pulling %s from %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription)) + remoteFile.pullToLocal() + } + } + + def pushOutputs() { + val outputs = ClassFieldCache.getFieldFiles(this, outputFields) + for (remoteFile <- filterRemoteFiles(outputs)) { + logger.info("Pushing %s to %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription)) + remoteFile.pushToRemote() + } + } + + def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = + outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap + + private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] = + fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]) + + /** The complete list of fields. */ + def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.getClass) + /** The @Input fields. */ + def inputFields: Seq[ArgumentSource] = ClassFieldCache.classInputFields(this.getClass) + /** The @Output fields. */ + def outputFields: Seq[ArgumentSource] = ClassFieldCache.classOutputFields(this.getClass) + /** The @Argument fields. */ + def argumentFields: Seq[ArgumentSource] = ClassFieldCache.classArgumentFields(this.getClass) } object QScript { diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala index 74487917f..2528c0572 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala @@ -11,6 +11,7 @@ import org.apache.log4j.Level import scala.tools.nsc.util.{FakePos, NoPosition, Position} import org.broadinstitute.sting.queue.util.TextFormatUtils._ import org.broadinstitute.sting.utils.classloader.JVMUtils +import tools.util.StringOps /** * Plugin manager for QScripts which loads QScripts into the current class loader. @@ -63,7 +64,7 @@ object QScriptManager extends Logging { * Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala */ private class Log4JReporter(val settings: Settings) extends AbstractReporter { - def displayPrompt { throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") } + def displayPrompt() { throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") } /** * Displays the message at position with severity. @@ -98,9 +99,9 @@ object QScriptManager extends Logging { */ def printSummary() { if (WARNING.count > 0) - printMessage(Level.WARN, countElementsAsString(WARNING.count, "warning") + " found") + printMessage(Level.WARN, StringOps.countElementsAsString(WARNING.count, "warning") + " found") if (ERROR.count > 0) - printMessage(Level.ERROR, countElementsAsString(ERROR.count, "error") + " found") + printMessage(Level.ERROR, StringOps.countElementsAsString(ERROR.count, "error") + " found") } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala index 2d4ff60f5..62c016812 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala @@ -185,7 +185,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod val tailLines = IOUtils.tail(errorFile, maxLines) val nl = "%n".format() val summary = if (tailLines.size > maxLines) "Last %d lines".format(maxLines) else "Contents" - this.function.jobErrorLines = collection.JavaConversions.asScalaIterable(tailLines).toSeq + this.function.jobErrorLines = collection.JavaConversions.collectionAsScalaIterable(tailLines).toSeq logger.error("%s of %s:%n%s".format(summary, errorFile, StringUtils.join(tailLines, nl))) } else { logger.error("Unable to access log file: %s".format(errorFile)) diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala new file mode 100644 index 000000000..eeabe6d1d --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.queue.engine + +import org.broadinstitute.sting.commandline.ArgumentSource +import org.broadinstitute.sting.queue.util.RemoteFile + +/** + * Plugin to sends QStatus messages + */ +trait QStatusMessenger { + def started() + def done(files: Seq[Map[ArgumentSource, Seq[RemoteFile]]]) + def exit(message: String) +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..1193e7dec --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,407 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala index 9522ec86c..a59f273ad 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala @@ -28,6 +28,7 @@ import org.broadinstitute.sting.queue.function.scattergather.GatherFunction import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Merges BAM files using net.sf.picard.sam.MergeSamFiles. @@ -47,13 +48,13 @@ class BamGatherFunction extends GatherFunction with PicardBamFunction with Retry // bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - val compression = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) + val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]] - val disableIndex = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) + val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean]) - val enableMD5 = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) + val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean]) super.freezeFieldValues() diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala index e619c0a02..395a34c60 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala @@ -92,6 +92,6 @@ object GATKIntervals { } private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = { - ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] + ArgumentTypeDescriptor.parseBinding(new ArgumentMatchStringValue(interval), classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index 75be4d773..fb22554f0 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -27,6 +27,7 @@ package org.broadinstitute.sting.queue.extensions.gatk import org.broadinstitute.sting.queue.function.scattergather.GatherFunction import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Merges a vcf text file. @@ -46,10 +47,10 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe // NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - val noHeader = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) + val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean] - val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) + val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean] // ensure that the gather function receives the same unsafe parameter as the scattered function diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala new file mode 100644 index 000000000..75e9300dc --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/9/12 + * Time: 5:59 PM + * To change this template use File | Settings | File Templates. + */ +class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateHsMetrics" + javaMainClass = "net.sf.picard.sam.CalculateHsMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Interval list with targets", shortName = "targets", fullName = "target_list", required = true) + var targets: File = _ + + @Argument(doc="Interval list with baits", shortName = "baits", fullName = "bait_list", required = true) + var baits: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + /* + @Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false) + var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1; + + @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) + var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + */ + override def freezeFieldValues() { + super.freezeFieldValues() +// if (outputIndex == null && output != null) + // outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + val level = "SAMPLE" + + override def inputBams = input + override def outputBam = output + //this.sortOrder = null + //this.createIndex = Some(true) + override def commandLine = super.commandLine + + required("BAIT_INTERVALS=" + baits) + + required("TARGET_INTERVALS=" + targets) + + required("REFERENCE_SEQUENCE=" + reference) + + optional("METRIC_ACCUMULATION_LEVEL="+level)/*+ + conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") + + conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) + + conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */ + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala new file mode 100644 index 000000000..de2b0af9e --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateGcMetrics" + javaMainClass = "net.sf.picard.sam.CalculateGcMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("CHART_OUTPUT=" + output+".pdf") + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala new file mode 100644 index 000000000..a9af4e858 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{ + analysisName = "CalculateMultipleMetrics" + javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") + + required("PROGRAM=QualityScoreDistribution") + + required("PROGRAM=MeanQualityByCycle") + + required("PROGRAM=CollectAlignmentSummaryMetrics" ) + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index aae846534..3849b976a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -28,7 +28,6 @@ import java.io.File import java.lang.annotation.Annotation import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.{QException, QSettings} -import collection.JavaConversions._ import java.lang.IllegalStateException import org.broadinstitute.sting.queue.util._ import org.broadinstitute.sting.utils.io.IOUtils @@ -194,13 +193,13 @@ trait QFunction extends Logging with QJobReport { def failOutputs: Seq[File] = statusPrefixes.map(path => new File(path + ".fail")) /** The complete list of fields on this CommandLineFunction. */ - def functionFields = QFunction.classFields(this.functionFieldClass).functionFields + def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.functionFieldClass) /** The @Input fields on this CommandLineFunction. */ - def inputFields = QFunction.classFields(this.functionFieldClass).inputFields + def inputFields: Seq[ArgumentSource] = ClassFieldCache.classInputFields(this.functionFieldClass) /** The @Output fields on this CommandLineFunction. */ - def outputFields = QFunction.classFields(this.functionFieldClass).outputFields + def outputFields: Seq[ArgumentSource] = ClassFieldCache.classOutputFields(this.functionFieldClass) /** The @Argument fields on this CommandLineFunction. */ - def argumentFields = QFunction.classFields(this.functionFieldClass).argumentFields + def argumentFields: Seq[ArgumentSource] = ClassFieldCache.classArgumentFields(this.functionFieldClass) /** * Returns the class that should be used for looking up fields. @@ -475,79 +474,12 @@ trait QFunction extends Logging with QJobReport { * @param source Field to get the value for. * @return value of the field. */ - def getFieldValue(source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(source), source.field) + def getFieldValue(source: ArgumentSource) = ClassFieldCache.getFieldValue(this, source) /** * Gets the value of a field. * @param source Field to set the value for. * @return value of the field. */ - def setFieldValue(source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(source), source.field, value) - - /** - * Walks gets the fields in this object or any collections in that object - * recursively to find the object holding the field to be retrieved or set. - * @param source Field find the invoke object for. - * @return Object to invoke the field on. - */ - private def invokeObj(source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](this)(ReflectionUtils.getValue(_, _)) -} - -object QFunction { - var parsingEngine: ParsingEngine = _ - - /** - * The list of fields defined on a class - * @param clazz The class to lookup fields. - */ - private class ClassFields(clazz: Class[_]) { - /** The complete list of fields on this CommandLineFunction. */ - val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq - /** The @Input fields on this CommandLineFunction. */ - val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) - /** The @Output fields on this CommandLineFunction. */ - val outputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) - /** The @Argument fields on this CommandLineFunction. */ - val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) - } - - /** - * The mapping from class to fields. - */ - private var classFieldsMap = Map.empty[Class[_], ClassFields] - - /** - * Returns the field on clazz. - * @param clazz Class to search. - * @param name Name of the field to return. - * @return Argument source for the field. - */ - def findField(clazz: Class[_], name: String) = { - classFields(clazz).functionFields.find(_.field.getName == name) match { - case Some(source) => source - case None => throw new QException("Could not find a field on class %s with name %s".format(clazz, name)) - } - } - - /** - * Returns the fields for a class. - * @param clazz Class to retrieve fields for. - * @return the fields for the class. - */ - private def classFields(clazz: Class[_]) = { - classFieldsMap.get(clazz) match { - case Some(classFields) => classFields - case None => - val classFields = new ClassFields(clazz) - classFieldsMap += clazz -> classFields - classFields - } - } - - /** - * Returns the Seq of fields for a QFunction class. - * @param clazz Class to retrieve fields for. - * @return the fields of the class. - */ - def classFunctionFields(clazz: Class[_]) = classFields(clazz).functionFields + def setFieldValue(source: ArgumentSource, value: Any) = ClassFieldCache.setFieldValue(this, source, value) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala index 686188e72..91cacbb71 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala @@ -25,13 +25,14 @@ package org.broadinstitute.sting.queue.function.scattergather import org.broadinstitute.sting.commandline.ArgumentSource -import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction} +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Shadow clones another command line function. */ object CloneFunction { - private lazy val cloneFunctionFields = QFunction.classFunctionFields(classOf[CloneFunction]) + private lazy val cloneFunctionFields = ClassFieldCache.classFunctionFields(classOf[CloneFunction]) } class CloneFunction extends CommandLineFunction { @@ -76,7 +77,7 @@ class CloneFunction extends CommandLineFunction { def commandLine = withScatterPart(() => originalFunction.commandLine) def getFieldValue(field: String): AnyRef = { - val source = QFunction.findField(originalFunction.getClass, field) + val source = ClassFieldCache.findField(originalFunction.getClass, field) getFieldValue(source) } @@ -98,7 +99,7 @@ class CloneFunction extends CommandLineFunction { } def setFieldValue(field: String, value: Any) { - val source = QFunction.findField(originalFunction.getClass, field) + val source = ClassFieldCache.findField(originalFunction.getClass, field) setFieldValue(source, value) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala new file mode 100644 index 000000000..870dd5617 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala @@ -0,0 +1,183 @@ +package org.broadinstitute.sting.queue.util + +import org.broadinstitute.sting.commandline._ +import scala.Some +import org.broadinstitute.sting.queue.QException +import collection.JavaConversions._ +import java.io.File + +/** + * Utilities and a static cache of argument fields for various classes populated by the parsingEngine. + * Because this class works with the ParsingEngine it can walk @ArgumentCollection hierarchies. + */ +object ClassFieldCache { + var parsingEngine: ParsingEngine = _ + + + // + // Field caching + // + + /** + * The list of fields defined on a class + * @param clazz The class to lookup fields. + */ + private class ClassFields(clazz: Class[_]) { + /** The complete list of fields on this CommandLineFunction. */ + val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq + /** The @Input fields on this CommandLineFunction. */ + val inputFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) + /** The @Output fields on this CommandLineFunction. */ + val outputFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) + /** The @Argument fields on this CommandLineFunction. */ + val argumentFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) + } + + /** + * The mapping from class to fields. + */ + private var classFieldsMap = Map.empty[Class[_], ClassFields] + + /** + * Returns the fields for a class. + * @param clazz Class to retrieve fields for. + * @return the fields for the class. + */ + private def classFields(clazz: Class[_]): ClassFields = { + classFieldsMap.get(clazz) match { + case Some(classFields) => classFields + case None => + val classFields = new ClassFields(clazz) + classFieldsMap += clazz -> classFields + classFields + } + } + + /** + * Returns the field on clazz. + * @param clazz Class to search. + * @param name Name of the field to return. + * @return Argument source for the field. + */ + def findField(clazz: Class[_], name: String): ArgumentSource = { + classFields(clazz).functionFields.find(_.field.getName == name) match { + case Some(source) => source + case None => throw new QException("Could not find a field on class %s with name %s".format(clazz, name)) + } + } + + /** + * Returns the Seq of fields for a QFunction class. + * @param clazz Class to retrieve fields for. + * @return the fields of the class. + */ + def classFunctionFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).functionFields + + /** + * Returns the Seq of inputs for a QFunction class. + * @param clazz Class to retrieve inputs for. + * @return the inputs of the class. + */ + def classInputFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).inputFields + + /** + * Returns the Seq of outputs for a QFunction class. + * @param clazz Class to retrieve outputs for. + * @return the outputs of the class. + */ + def classOutputFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).outputFields + + /** + * Returns the Seq of arguments for a QFunction class. + * @param clazz Class to retrieve arguments for. + * @return the arguments of the class. + */ + def classArgumentFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).argumentFields + + + // + // get/set fields as AnyRef + // + + /** + * Gets the value of a field. + * @param obj Top level object storing the source info. + * @param source Field to get the value for. + * @return value of the field. + */ + def getFieldValue(obj: AnyRef, source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(obj, source), source.field) + + /** + * Gets the value of a field. + * @param obj Top level object storing the source info. + * @param source Field to set the value for. + * @return value of the field. + */ + def setFieldValue(obj: AnyRef, source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(obj, source), source.field, value) + + /** + * Walks gets the fields in this object or any collections in that object + * recursively to find the object holding the field to be retrieved or set. + * @param obj Top level object storing the source info. + * @param source Field find the invoke object for. + * @return Object to invoke the field on. + */ + private def invokeObj(obj: AnyRef, source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](obj)(ReflectionUtils.getValue(_, _)) + + + // + // get/set fields as java.io.File + // + + /** + * Gets the files from the fields. The fields must be a File, a FileExtension, or a Seq or Set of either. + * @param obj Top level object storing the source info. + * @param fields Fields to get files. + * @return for the fields. + */ + def getFieldFiles(obj: AnyRef, fields: Seq[ArgumentSource]): Seq[File] = { + var files: Seq[File] = Nil + for (field <- fields) + files ++= getFieldFiles(obj, field) + files.distinct + } + + /** + * Gets the files from the field. The field must be a File, a FileExtension, or a Seq or Set of either. + * @param obj Top level object storing the source info. + * @param field Field to get files. + * @return for the field. + */ + def getFieldFiles(obj: AnyRef, field: ArgumentSource): Seq[File] = { + var files: Seq[File] = Nil + CollectionUtils.foreach(getFieldValue(obj, field), (fieldValue) => { + val file = fieldValueToFile(field, fieldValue) + if (file != null) + files :+= file + }) + files.distinct + } + + /** + * Gets the file from the field. The field must be a File or a FileExtension and not a Seq or Set. + * @param obj Top level object storing the source info. + * @param field Field to get the file. + * @return for the field. + */ + def getFieldFile(obj: AnyRef, field: ArgumentSource): File = + fieldValueToFile(field, getFieldValue(obj, field)) + + /** + * Converts the field value to a file. The field must be a File or a FileExtension. + * @param field Field to get the file. + * @param value Value of the File or FileExtension or null. + * @return Null if value is null, otherwise the File. + * @throws QException if the value is not a File or FileExtension. + */ + private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match { + case file: File => file + case null => null + case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown)) + } + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala new file mode 100644 index 000000000..9d94975ba --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala @@ -0,0 +1,14 @@ +package org.broadinstitute.sting.queue.util + +import java.io.File +import org.broadinstitute.sting.utils.io.FileExtension + +/** + * An extension of java.io.File that can be pulled from or pushed to a remote location. + */ +trait RemoteFile extends File with FileExtension { + def pullToLocal() + def pushToRemote() + def deleteRemote() + def remoteDescription: String +} diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index e17342442..ce7667140 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -7,7 +7,6 @@ -