From 9e53c06328bf6b00034017819341b020b506e72d Mon Sep 17 00:00:00 2001 From: hanna Date: Sun, 20 Dec 2009 19:19:23 +0000 Subject: [PATCH] First revision of command-line argument support for GenotypeWriter. Also, fixed the damn build. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2416 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/CommandLineExecutable.java | 4 +- .../GenotypeWriterArgumentTypeDescriptor.java | 139 ++++++++++++++++++ .../walkers/ConcordanceTruthTableTest.java | 12 +- 3 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 36127ebbc..782ededa0 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor; import java.io.File; import java.io.FileNotFoundException; @@ -96,7 +97,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { * @return A collection of type descriptors generating implementation-dependent placeholders. */ protected Collection getArgumentTypeDescriptors() { - return Arrays.asList( new SAMFileReaderArgumentTypeDescriptor(GATKEngine), + return Arrays.asList( new GenotypeWriterArgumentTypeDescriptor(GATKEngine), + new SAMFileReaderArgumentTypeDescriptor(GATKEngine), new SAMFileWriterArgumentTypeDescriptor(GATKEngine), new OutputStreamArgumentTypeDescriptor(GATKEngine) ); } diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java new file mode 100644 index 000000000..2cf595a44 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java @@ -0,0 +1,139 @@ +package org.broadinstitute.sting.gatk.io.stubs; + +import org.broadinstitute.sting.utils.cmdLine.ArgumentTypeDescriptor; +import org.broadinstitute.sting.utils.cmdLine.ArgumentSource; +import org.broadinstitute.sting.utils.cmdLine.ArgumentMatches; +import org.broadinstitute.sting.utils.cmdLine.ArgumentDefinition; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; + +import java.io.File; +import java.util.List; +import java.util.Arrays; + +import net.sf.samtools.SAMFileReader; + +/** + * Injects new command-line arguments into the system providing support for the genotype writer. + * + * @author mhanna + * @version 0.1 + */ +public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { + /** + * The engine into which output stubs should be fed. + */ + private GenomeAnalysisEngine engine; + + /** + * Create a new GenotypeWriter argument, notifying the given engine when that argument has been created. + * @param engine + */ + public GenotypeWriterArgumentTypeDescriptor(GenomeAnalysisEngine engine) { + this.engine = engine; + } + + /** + * Reports whether this ArgumentTypeDescriptor supports the given type. + * @param type The type to check. + * @return True if the argument is a GenotypeWriter. + */ + @Override + public boolean supports( Class type ) { + return GenotypeWriter.class.equals(type); + } + + /** + * Create the argument definitions associated with this source. + * Assumes that this type descriptor is relevant for this source. + * @param source Source class and field for the given argument. + * @return A list of all associated argument definitions. + */ + @Override + public List createArgumentDefinitions( ArgumentSource source ) { + return Arrays.asList( createGenotypeFileArgumentDefinition(source), + createGenotypeFormatArgumentDefinition(source) ); + } + + /** + * Convert the given argument matches into a single object suitable for feeding into the ArgumentSource. + * @param source Source for this argument. + * @param type + * @param matches Matches that match with this argument. + * @return Transform from the matches into the associated argument. + */ + @Override + public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + String writerFileName = getArgumentValue(createGenotypeFileArgumentDefinition(source),matches); + if(writerFileName == null) + throw new StingException("Genotype format was supplied, but no file was supplied to contain the genotype info.."); + + String genotypeFormatText = getArgumentValue(createGenotypeFormatArgumentDefinition(source),matches); + GenotypeWriterFactory.GENOTYPE_FORMAT genotypeFormat = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; + if(genotypeFormatText != null) { + try { + genotypeFormat = Enum.valueOf(GenotypeWriterFactory.GENOTYPE_FORMAT.class,genotypeFormatText); + } + catch(IllegalArgumentException ex) { + throw new StingException(String.format("Genotype format %s is invalid.",genotypeFormatText)); + } + } + + GenotypeWriterStub stub = new GenotypeWriterStub(engine, new File(writerFileName),genotypeFormat); + + engine.addOutput(stub); + + return stub; + } + + /** + * Gets the definition of the argument representing the BAM file itself. + * @param source Argument source for the BAM file. Must not be null. + * @return Argument definition for the BAM file itself. Will not be null. + */ + private ArgumentDefinition createGenotypeFileArgumentDefinition(ArgumentSource source) { + Argument description = this.getArgumentDescription(source); + + boolean isFullNameProvided = description.fullName().trim().length() > 0; + boolean isShortNameProvided = description.shortName().trim().length() > 0; + + String fullName = isFullNameProvided ? description.fullName().trim() : "variants_out"; + + // If the short name is provided, use that. If the user hasn't provided any names at all, use + // the default. If somewhere in the middle, leave the short name blank. + String shortName; + if( isShortNameProvided ) + shortName = description.shortName().trim(); + else if( !isFullNameProvided ) + shortName = "varout"; + else + shortName = null; + + return new ArgumentDefinition( source, + fullName, + shortName, + getDoc(source), + isRequired(source), + getExclusiveOf(source), + getValidationRegex(source) ); + } + + /** + * Creates the optional compression level argument for the BAM file. + * @param source Argument source for the BAM file. Must not be null. + * @return Argument definition for the BAM file itself. Will not be null. + */ + private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) { + return new ArgumentDefinition( source, + "variant_output_format", + "vf", + "Format to be used to represent variants; default is VCF", + false, + null, + null ); + } + +} diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java index e3a94a471..08d32d71c 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java @@ -73,12 +73,12 @@ public class ConcordanceTruthTableTest extends BaseTest { List> homNoRef = new ArrayList>(1); homNoRef.add(new Pair(hom1,null)); - Pair> countShouldBeOne = ctt.getPooledAlleleFrequency(oneHet,'G'); - Pair> countShouldBeTwo = ctt.getPooledAlleleFrequency(oneHom,'G'); - Pair> countShouldBeFour = ctt.getPooledAlleleFrequency(twoHetOneHom,'G'); - Pair> countShouldBeSix = ctt.getPooledAlleleFrequency(twoHetTwoHom,'G'); - Pair> countShouldBeThree = ctt.getPooledAlleleFrequency(hetHomNoRef,'G'); - Pair> countShouldBeTwoHereToo = ctt.getPooledAlleleFrequency(homNoRef, 'G'); + Pair> countShouldBeOne = ctt.getPooledAlleleFrequency(oneHet,'G'); + Pair> countShouldBeTwo = ctt.getPooledAlleleFrequency(oneHom,'G'); + Pair> countShouldBeFour = ctt.getPooledAlleleFrequency(twoHetOneHom,'G'); + Pair> countShouldBeSix = ctt.getPooledAlleleFrequency(twoHetTwoHom,'G'); + Pair> countShouldBeThree = ctt.getPooledAlleleFrequency(hetHomNoRef,'G'); + Pair> countShouldBeTwoHereToo = ctt.getPooledAlleleFrequency(homNoRef, 'G'); int expecChips = 4+4+6+7+2+1; int numChips = countShouldBeOne.getSecond().getSecond() + countShouldBeTwo.getSecond().getSecond() +