Should have checked in

This commit is contained in:
Mark DePristo 2011-08-19 14:35:16 -04:00
parent e2c066ac1e
commit 49e831a13b
2 changed files with 174 additions and 1 deletions

View File

@ -51,8 +51,66 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads
* This tool provides simple, powerful read clipping capabilities.
*
* <p>
* It allows the user to clip bases in reads
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
* </p>
*
* <h2>Input</h2>
* <p>
* A BAM file containing.
* </p>
*
* <h2>Output</h2>
* <p>
* <ul>
* <li>-o: a OutputFormatted (recommended BED) file with the callable status covering each base</li>
* <li>-summary: a table of callable status x count of all examined bases</li>
* </ul>
* </p>
*
* <h2>Examples</h2>
* <pre>
* -T CallableLociWalker \
* -I my.bam \
* -summary my.summary \
* -o my.bed
* </pre>
*
* would produce a BED file (my.bed) that looks like:
*
* <pre>
* 20 10000000 10000864 CALLABLE
* 20 10000865 10000985 POOR_MAPPING_QUALITY
* 20 10000986 10001138 CALLABLE
* 20 10001139 10001254 POOR_MAPPING_QUALITY
* 20 10001255 10012255 CALLABLE
* 20 10012256 10012259 POOR_MAPPING_QUALITY
* 20 10012260 10012263 CALLABLE
* 20 10012264 10012328 POOR_MAPPING_QUALITY
* 20 10012329 10012550 CALLABLE
* 20 10012551 10012551 LOW_COVERAGE
* 20 10012552 10012554 CALLABLE
* 20 10012555 10012557 LOW_COVERAGE
* 20 10012558 10012558 CALLABLE
* et cetera...
* </pre>
* as well as a summary table that looks like:
*
* <pre>
* state nBases
* REF_N 0
* CALLABLE 996046
* NO_COVERAGE 121
* LOW_COVERAGE 928
* EXCESSIVE_COVERAGE 0
* POOR_MAPPING_QUALITY 2906
* </pre>
*
* @author Mark DePristo
* @since May 7, 2010
*/
@Requires({DataSource.READS})
public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {

View File

@ -0,0 +1,115 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Summary test
*
* <p>Body test</p>
*/
public class DocumentationTest extends RodWalker<Integer, Integer> {
// the docs for the arguments are in the collection
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
* dbSNP comparison VCF. By default, the dbSNP file is used to specify the set of "known" variants.
* Other sets can be specified with the -knownName (--known_names) argument.
*/
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
/**
* detailed documentation about the argument goes here.
*/
@Input(fullName="listofRodBinding", shortName = "disc", doc="Output variants that were not called in this Feature comparison track", required=false)
private List<RodBinding<VariantContext>> listOfRodBinding = Collections.emptyList();
@Input(fullName="optionalRodBinding", shortName = "conc", doc="Output variants that were also called in this Feature comparison track", required=false)
private RodBinding<VariantContext> concordanceTrack;
@Input(fullName="optionalRodBindingWithoutDefault", shortName = "optionalRodBindingWithoutDefault", doc="Output variants that were also called in this Feature comparison track", required=false)
private RodBinding<VariantContext> noDefaultOptionalRodBinding;
@Input(fullName="optionalRodBindingWithoutDefaultNull", shortName = "shortTest", doc="Output variants that were also called in this Feature comparison track", required=false)
private RodBinding<VariantContext> noDefaultOptionalRodBindingNull = null;
@Input(fullName="featureArg", shortName = "featureArg", doc="A RodBinding of feature", required=false)
private RodBinding<Feature> featureArg = null;
@Output(doc="VCFWriter",required=true)
protected VCFWriter vcfWriter = null;
@Advanced
@Argument(fullName="setString", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames;
@Argument(fullName="setStringInitialized", shortName="setStringInitialized", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> setStringInitialized = new HashSet<String>();
@Argument(shortName="optionalArgWithMissinglessDefault", doc="One or more criteria to use when selecting the data. Evaluated *after* the specified samples are extracted and the INFO-field annotations are updated.", required=false)
public ArrayList<String> SELECT_EXPRESSIONS = new ArrayList<String>();
@Argument(shortName="AAAAA", fullName = "AAAAA", doc="Should be the first argument", required=false)
public boolean FIRST_ARG = false;
@Advanced
@Argument(fullName="booleanArg", shortName="env", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
private boolean EXCLUDE_NON_VARIANTS = false;
@Advanced
@Argument(fullName="booleanArray", shortName="booleanArray", doc="x", required=false)
private boolean[] boolArray = null;
@Argument(fullName="enumTest", shortName="enumTest", doc="Test enum", required=false)
private TestEnum TestEnumArg = TestEnum.ENUM2;
public enum TestEnum {
/** Docs for enum1 */
ENUM1,
/** Docs for enum2 */
ENUM2
}
@Hidden
@Argument(fullName="hiddenArg", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false)
private boolean KEEP_AF_SPECTRUM = false;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) { return value + sum; }
public void onTraversalDone(Integer result) { }
}