Added category docs for the remaining public walkers (I think I got them all). I removed a couple of totally unnecessary walkers.

This commit is contained in:
Eric Banks 2012-07-25 21:40:28 -04:00
parent 2982b24c4b
commit 7eb3f54750
31 changed files with 98 additions and 701 deletions

View File

@ -47,6 +47,7 @@ import org.broadinstitute.sting.utils.GenomeLocComparator;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -82,6 +83,7 @@ import java.util.*;
* </pre>
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.INTERVAL)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {

View File

@ -1,115 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashSet;
import java.util.Set;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
* in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter
* command line argument.
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
public class FindReadsWithNames extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out;
@Argument(fullName = "readNamesToKeep", shortName = "rn", doc="names to keep", required = true)
File readNamesFile = null;
Set<String> namesToKeep;
/**
* The initialize function.
*/
public void initialize() {
try {
namesToKeep = new HashSet<String>(new XReadLines(readNamesFile).readLines());
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(readNamesFile, e);
}
}
/**
* The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return namesToKeep.contains(read.getReadName());
}
/**
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}
/**
* reduceInit is called once before any calls to the map function. We use it here to setup the output
* bam file, if it was specified on the command line
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
*/
public SAMFileWriter reduceInit() {
return out;
}
/**
* given a read and a output location, reduce by emitting the read
* @param read the read itself
* @param output the output source
* @return the SAMFileWriter, so that the next reduce can emit to the same source
*/
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
output.addAlignment(read);
return output;
}
}

View File

@ -1,8 +1,10 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
@ -41,6 +43,7 @@ import java.text.NumberFormat;
* reads with QC failure flag set, number of duplicates, percentage mapped, etc.
* @author aaron
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
public class FlagStat extends ReadWalker<Integer, Integer> {
@Output

View File

@ -30,10 +30,12 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -60,6 +62,7 @@ import java.util.List;
* Associated command:
* samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
@Output
PrintStream out;

View File

@ -29,9 +29,11 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
@ -39,6 +41,7 @@ import java.io.PrintStream;
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
* of the given ROD.
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class PrintRODs extends RodWalker<Integer, Integer> {
@Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
public RodBinding<Feature> input;

View File

@ -31,8 +31,10 @@ import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -45,6 +47,7 @@ import java.util.Map;
* Divides the input data set into separate BAM files, one for each sample in the input data set. The split
* files are named concatenating the sample name to the end of the provided outputRoot command-line argument.
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@WalkerName("SplitSamFile")
@Requires({DataSource.READS})
public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWriter>> {

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -39,6 +40,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -51,9 +53,10 @@ import java.util.Set;
/**
* Produces an input file to Beagle imputation engine, listing unphased, hard-called genotypes for a single sample
* in input variant file. Will additional hold back a fraction of the sites for evaluation, marking the
* in input variant file. Will additionally hold back a fraction of the sites for evaluation, marking the
* genotypes at that sites as missing, and writing the truth of these sites to a second VCF file
*/
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
@Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;

View File

@ -27,12 +27,14 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
import java.util.Arrays;
@ -41,6 +43,7 @@ import java.util.List;
/**
* Test routine for new VariantContext object
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CompareCallableLoci extends RodWalker<List<CallableLoci.CallableBaseState>, long[][]> {
@Output
protected PrintStream out;

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -32,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
import java.util.List;
@ -60,6 +62,7 @@ import java.util.List;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Allows(value = {DataSource.REFERENCE})
@Requires(value = {DataSource.REFERENCE})
@By(DataSource.REFERENCE)

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -66,6 +68,7 @@ import java.io.PrintStream;
*
* @author Kiran Garimella, Mark DePristo
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
@Output PrintStream out;
@Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false)

View File

@ -2,11 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
@ -44,8 +46,7 @@ import java.util.List;
* @author Kiran Garimela
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
@Output
public PrintStream out;

View File

@ -27,10 +27,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.File;
import java.io.PrintStream;
@ -131,6 +133,7 @@ import java.util.List;
* @author Mark DePristo
* @since 7/4/11
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class DiffObjects extends RodWalker<Integer, Integer> {
/**
* Writes out a file of the DiffEngine format:

View File

@ -29,10 +29,12 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -65,6 +67,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
@Output(required=false, doc="Output bam")

View File

@ -1,19 +1,16 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
*
* <p>
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*
* Walks over the input data set, calculating the number of bases seen for diagnostic purposes.
*
* <h2>Input</h2>
* <p>
@ -22,20 +19,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*
* <h2>Output</h2>
* <p>
* Number of reads seen.
* Number of bases seen.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T CountReads \
* -T CountBases \
* -o output.txt \
* -I input.bam \
* [-L input.intervals]
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountBases extends ReadWalker<Integer, Long> {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {

View File

@ -5,12 +5,14 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
import java.util.Collections;
@ -21,6 +23,7 @@ import java.util.List;
* very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to.
* This was its very first use.
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountIntervals extends RefWalker<Long, Long> {
@Output
PrintStream out;

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
@ -31,13 +32,13 @@ import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
* Walks over the input data set, calculating the number of reads seen from male samples for diagnostic purposes.
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountMales extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {

View File

@ -29,12 +29,14 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.Collections;
import java.util.List;
@ -63,6 +65,7 @@ import java.util.List;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountRODsByRef extends RefWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> {
/**

View File

@ -2,12 +2,14 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -17,7 +19,7 @@ import java.util.HashMap;
import java.util.Map;
/**
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
* Walks over the input data set, counting the number of read events (from the CIGAR operator)
*
* <h2>Input</h2>
* <p>
@ -26,21 +28,20 @@ import java.util.Map;
*
* <h2>Output</h2>
* <p>
* Number of reads ending in each category.
* </p>
* Number of reads events for each category
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ReadEndIndels \
* -T CountReadEvents \
* -o output.grp \
* -I input.bam \
* [-L input.intervals]
* </pre>
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReadEvents extends ReadWalker<Map<CigarOperator, ArrayList<Integer>> , Map<Integer, Map<CigarOperator, Long>>> {
@Output (doc = "GATKReport table output")

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
@ -9,6 +10,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List;
@ -30,12 +32,13 @@ import java.util.List;
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ReadEndIndels \
* -T CountTerminusEvent \
* -o output.txt \
* -I input.bam \
* [-L input.intervals]
* </pre>
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountTerminusEvent extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {

View File

@ -1,434 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.collections.PrimitivePair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Apr 9, 2010
* Time: 12:16:41 PM
* To change this template use File | Settings | File Templates.
*/
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*/
@Requires({DataSource.READS})
public class CycleQuality extends ReadWalker<Integer,Integer> {
@Output
protected PrintStream out;
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
"on mapped reads only, while unmapped reads will be discarded", required=false)
protected boolean MAPPED_ONLY = true;
@Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false)
protected int MAX_READ_LENGTH = 500;
@Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true)
protected String PREFIX = null;
// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false)
protected boolean HTML = false;
@Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false)
protected double QTHRESHOLD = 10.0;
@Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+
"original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.")
protected boolean ASSESS_BOTH_QUALS = false;
private Map<String,CycleStats[]> cyclesByLaneMap = null;
private Map<String,CycleStats[]> cyclesByLibraryMap = null;
private Map<String,CycleStats[]> cyclesByLaneMapOrig = null;
private Map<String,CycleStats[]> cyclesByLibraryMapOrig = null;
public void initialize() {
if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified");
cyclesByLaneMap = new HashMap<String,CycleStats[]>();
cyclesByLibraryMap = new HashMap<String,CycleStats[]>();
cyclesByLaneMapOrig = new HashMap<String,CycleStats[]>();
cyclesByLibraryMapOrig = new HashMap<String,CycleStats[]>();
}
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;
SAMReadGroupRecord rg = read.getReadGroup();
if ( rg == null ) throw new UserException.ReadMissingReadGroup(read);
String lane = read.getReadGroup().getPlatformUnit();
String library = read.getReadGroup().getLibrary();
if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information");
if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information");
int end = 0;
if ( read.getReadPairedFlag() ) {
if ( read.getFirstOfPairFlag() ) {
if ( read.getSecondOfPairFlag() )
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 1;
} else {
if ( ! read.getSecondOfPairFlag() )
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 2;
}
}
CycleStats[] byLane = cyclesByLaneMap.get(lane);
CycleStats[] byLib = cyclesByLibraryMap.get(library);
//byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read);
byte [] quals = AlignmentUtils.getQualsInCycleOrder(read);
// if end == 0 (single end lane), we allocate array of length 1, otherwise we need two
// elements in the array in order to be able to collect statistics for each end in the pair independently
if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]);
if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]);
if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats
if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH);
if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH);
byLane[end].add(quals);
byLib[end].add(quals);
return 1; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
public Integer reduceInit() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
public Integer reduce(Integer value, Integer sum) {
return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates.
}
public void onTraversalDone(Integer result) {
if ( HTML ) {
out.println("<h3>Cycle Quality QC</h3>\n");
out.println("File(s) analyzed: <br>");
for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"<br>");
out.println("<br>");
}
if ( HTML ) out.println("<br><br>");
out.println("\n"+result+" reads analyzed\n");
if ( HTML ) out.println("<br><br>");
out.println("by platform unit:");
if ( HTML ) out.println("<br>");
report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true);
out.println();
if ( HTML ) out.println("<br><br>");
out.println("\nby library:");
if ( HTML ) out.println("<br>");
report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true);
out.println();
if ( HTML ) out.println("<br><br>");
}
private void report2(Map<String,CycleStats[]> m, File f,boolean summaryReport) {
long totalReads_1 =0;
long totalReads_2 =0;
long totalReads_unpaired = 0;
SortedSet<String> columns = new TreeSet<String>();
int maxLength = 0; // maximum read length across all lanes/read ends analyzed
for( Map.Entry<String,CycleStats[]> e : m.entrySet() ) {
if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength();
if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) {
totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane
} else {
totalReads_1 += e.getValue()[0].getReadCount();
totalReads_2 += e.getValue()[1].getReadCount();
if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength();
}
columns.add(e.getKey());
}
if ( summaryReport ) {
if ( totalReads_1 == 0 && totalReads_2 != 0) {
out.println(" End 1: No reads");
if ( HTML ) out.println("<br>");
}
if ( totalReads_2 == 0 && totalReads_1 != 0 ) {
out.println(" End 2: No reads");
if ( HTML ) out.println("<br>");
}
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) {
out.println(" No reads found.");
if ( HTML ) out.println("<br>");
}
}
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return;
try {
BufferedWriter w = new BufferedWriter(new FileWriter(f));
w.write("cycle");
for( String col : columns ) {
CycleStats[] data = m.get(col);
if ( summaryReport ) {
out.print(" ");
out.print(col);
}
CycleStats end1 = data[0];
int minL = ( end1 == null ? 0 : end1.getMinReadLength() );
int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() );
if ( data.length == 2 && data[1] != null ) {
if ( summaryReport ) {
out.println(": paired");
if ( HTML ) out.println("<br>");
out.println(" Reads analyzed:");
if ( HTML ) out.println("<br>");
}
CycleStats end2 = data[1];
out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) );
minL = ( end2 == null ? 0 : end2.getMinReadLength() );
maxL = ( end2 == null ? 0 : end2.getMaxReadLength() );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
}
else {
out.println(": unpaired");
if ( HTML ) out.println("<br>");
out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
}
w.write('\t') ;
w.write(col);
if ( data.length == 1 || data.length == 2 && data[1] == null ) {
w.write(".unpaired");
w.write('\t');
w.write(col);
w.write(".unpaired.stddev");
} else {
w.write(".end1");
w.write('\t');
w.write(col);
w.write(".end1.stddev");
w.write('\t') ;
w.write(col);
w.write(".end2");
w.write('\t');
w.write(col);
w.write(".end2.stddev");
}
}
w.write('\n');
int cycle = 0;
Map<String,List<PrimitivePair.Int>> problems = new HashMap<String,List<PrimitivePair.Int>>();
while ( cycle < maxLength ) {
w.write(Integer.toString(cycle+1));
for ( String col : columns ) {
CycleStats[] data = m.get(col);
CycleStats end1 = data[0];
w.write('\t');
if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t.");
else {
double aq = end1.getCycleQualAverage(cycle);
w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle)));
recordProblem(aq,cycle, problems,col+".End1");
}
if ( data.length > 1 && data[1] != null ) {
w.write('\t');
CycleStats end2 = data[1];
if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t.");
else {
double aq = end2.getCycleQualAverage(cycle);
w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle)));
recordProblem(aq,cycle, problems,col+".End2");
}
}
}
w.write('\n');
cycle++;
}
w.close();
if ( HTML ) out.println("<hr>");
if ( HTML ) out.println("<br>");
out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):");
if ( HTML ) out.println("<br>");
for ( String col : columns ) {
List<PrimitivePair.Int> lp = problems.get(col+".End1");
out.print(" "+col+" End1:");
if ( lp == null ) {
out.print(" GOOD");
} else {
for ( PrimitivePair.Int p : lp ) {
out.print(" "+(p.first+1)+"-");
if ( p.second >= 0 ) out.print((p.second+1));
else out.print("END");
}
}
out.println();
if ( HTML ) out.println("<br>");
lp = problems.get(col+".End2");
out.print(" "+col+" End2:");
if ( lp == null ) {
out.print(" GOOD");
} else {
for ( PrimitivePair.Int p : lp ) {
out.print(" "+(p.first+1)+"-");
if ( p.second >= 0 ) out.print(p.second);
else out.print("END");
}
}
out.println();
if ( HTML ) out.println("<br>");
}
} catch (IOException ioe) {
throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe);
}
}
private void recordProblem(double q, int cycle, Map<String,List<PrimitivePair.Int>> problems, String name) {
PrimitivePair.Int p = null;
List<PrimitivePair.Int> lp = null;
if ( q < QTHRESHOLD ) { // there is a problem
if ( ! problems.containsKey(name) ) {
lp = new ArrayList<PrimitivePair.Int>();
p = new PrimitivePair.Int(cycle,-1);
lp.add(p);
problems.put(name,lp);
} else {
lp = problems.get(name);
p = lp.get(lp.size()-1);
}
if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases
lp.add(new PrimitivePair.Int(cycle,-1)); // start new run
}
} else { // good base
if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed
lp = problems.get(name);
p = lp.get(lp.size()-1);
if ( p.second == -1 ) p.second = cycle - 1;
}
}
}
static class CycleStats {
private long readCount = 0;
private double[] cycleQualsAv = null;
private double[] cycleQualsSd = null;
private int minL = 1000000000; // read min. length
private int maxL = 0; // read max. length
public CycleStats(int N) {
readCount = 0;
cycleQualsAv = new double[N];
cycleQualsSd = new double[N];
}
public void add(byte[] quals) {
if ( quals.length > cycleQualsAv.length )
throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length");
if ( quals.length > maxL ) maxL = quals.length;
if ( quals.length < minL ) minL = quals.length;
readCount++;
for ( int i = 0 ; i < quals.length ; i++ ) {
// NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e.
// we are initializing with the very first record) or not. Indeed, the arrays are initialized with
// 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!);
// this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal
// to 0, so Sd will be initially set to 0.
double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance
cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean
cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] );
}
}
public long getReadCount() { return readCount; }
public int getMaxReadLength() { return maxL; }
public int getMinReadLength() { return minL; }
// long [] getCycleQualSums() { return cycleQuals; }
// long getCycleQualSum(int i) { return cycleQuals[i]; }
double getCycleQualAverage(int i) { return cycleQualsAv[i]; }
double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); }
}
}

View File

@ -24,7 +24,9 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -32,10 +34,13 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
/**
* a walker that simply throws errors. Allows us to test that the engine is behaving as expected with error handling
*/
@Hidden
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class ErrorThrowing extends RodWalker<Integer,Integer> implements TreeReducible<Integer> {
@Input(fullName="exception", shortName = "E", doc="Java class of exception to throw", required=true)
public String exceptionToThrow;

View File

@ -27,17 +27,19 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
/**
* Prints out counts of the number of reference ordered data objects encountered.
* Quality control for the reference fasta
*
*
* <h2>Input</h2>
@ -54,10 +56,11 @@ import java.io.PrintStream;
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T QCRefWalker
* -T QCRef
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class QCRef extends RefWalker<Integer, Integer> {
@Output
public PrintStream out;

View File

@ -27,6 +27,7 @@ import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -51,6 +53,7 @@ import java.util.Arrays;
* Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
* of the clipping to the output stream.
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
public class ReadClippingStats extends ReadWalker<ReadClippingStats.ReadClippingInfo,Integer> {
@Output

View File

@ -1,125 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Checks all reads passed through the system to ensure that
* the same read is not passed to the walker multiple consecutive times.
* @author aaron
*/
public class ReadValidation extends ReadWalker<SAMRecord, SAMRecord> {
// our MD5 sum
private MessageDigest m;
// private list of md5sums
private final List<String> list = new ArrayList<String>();
/**
* The initialize function.
*/
public void initialize() {
try {
m = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new ReviewedStingException("Unable to get the MD5 algorithm. Get a more eXtreme version of JAVA!@!@!!");
}
}
/**
* The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return true;
}
/**
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}
/**
* reduceInit is called once before any calls to the map function. We use it here to setup the output
* bam file, if it was specified on the command line
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
*/
public SAMRecord reduceInit() {
return null;
}
/**
* given a read and a output location, reduce by emitting the read
* @param read the read itself
* @param output the output source
* @return the SAMFileWriter, so that the next reduce can emit to the same source
*/
public SAMRecord reduce( SAMRecord read, SAMRecord output ) {
if (output == null)
return read;
if ((read.getReferenceIndex() == output.getReferenceIndex()) && (read.getAlignmentStart() < output.getAlignmentStart())) {
logger.error("saw the read " + read.getReadName() + " duplicated, old alignment = " + output.getAlignmentStart());
}
else if (read.getReferenceIndex() != output.getReferenceIndex()){
logger.warn("Switching Chromo");
}
return read;
}
/**
* when we're done traversing, close the reads file
* @param output the SAMFileWriter we've used in the reduce phase
*/
public void onTraversalDone( SAMFileWriter output ) {
if (output != null) {
output.close();
}
}
}

View File

@ -24,10 +24,8 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
@ -37,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
@ -49,6 +48,7 @@ import java.util.List;
/**
* a walker for validating (in the style of validating pile-up) the ROD system.
*/
@Hidden
@Reference(window=@Window(start=-40,stop=40))
public class RodSystemValidation extends RodWalker<Integer,Integer> {

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -40,6 +41,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
@ -48,8 +50,9 @@ import java.util.Arrays;
/**
* At every locus in the input set, compares the pileup data (reference base, aligned base from
* each overlapping read, and quality score) to the reference pileup data generated by samtools. Samtools' pileup data
* should be specified using the command-line argument '-B pileup,SAMPileup,<your sam pileup file>'.
* should be specified using the command-line argument '-pileup:SAMPileup <your sam pileup file>'.
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires(value={DataSource.READS,DataSource.REFERENCE})
public class ValidatingPileup extends LocusWalker<Integer, ValidationStats> implements TreeReducible<ValidationStats> {
@Input(fullName = "pileup", doc="The SAMPileup containing the expected output", required = true)

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -45,6 +47,7 @@ import java.util.Set;
/**
* Filters a lifted-over VCF file for ref bases that have been changed.
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=0,stop=100))
public class FilterLiftedVariants extends RodWalker<Integer, Integer> {

View File

@ -31,6 +31,7 @@ import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -39,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -51,6 +53,7 @@ import java.util.*;
/**
* Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted.
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
public class LiftoverVariants extends RodWalker<Integer, Integer> {
@ArgumentCollection

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -38,6 +39,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
@ -48,6 +50,7 @@ import java.util.*;
/**
* Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results.
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
@ArgumentCollection

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.apache.commons.io.FilenameUtils;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -98,6 +100,7 @@ import java.util.*;
* </pre>
*/
@SuppressWarnings("unused")
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -13,6 +14,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -27,6 +29,7 @@ import java.util.*;
* work efficiently on large VCFs (or at least give a progress bar). This
* produces a binary ped file in individual major mode.
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();