2009-03-16 06:21:48 +08:00
package org.broadinstitute.sting.gatk ;
2009-02-27 05:50:29 +08:00
2009-03-11 03:34:00 +08:00
import net.sf.samtools.SAMFileReader.ValidationStringency ;
2009-02-27 05:50:29 +08:00
import edu.mit.broad.picard.cmdline.CommandLineProgram ;
import edu.mit.broad.picard.cmdline.Usage ;
import edu.mit.broad.picard.cmdline.Option ;
2009-03-16 06:21:48 +08:00
import org.broadinstitute.sting.gatk.walkers.* ;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData ;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP ;
import org.broadinstitute.sting.gatk.refdata.rodGFF ;
2009-02-27 05:50:29 +08:00
import java.io.* ;
import java.util.HashMap ;
2009-03-16 06:21:48 +08:00
public class GenomeAnalysisTK extends CommandLineProgram {
2009-02-27 05:50:29 +08:00
// Usage and parameters
@Usage ( programVersion = "0.1" ) public String USAGE = "SAM Validator\n" ;
@Option ( shortName = "I" , doc = "SAM or BAM file for validation" ) public File INPUT_FILE ;
@Option ( shortName = "M" , doc = "Maximum number of reads to process before exiting" , optional = true ) public String MAX_READS_ARG = "-1" ;
@Option ( shortName = "S" , doc = "How strict should we be with validation" , optional = true ) public String STRICTNESS_ARG = "strict" ;
@Option ( shortName = "R" , doc = "Reference sequence file" , optional = true ) public File REF_FILE_ARG = null ;
@Option ( shortName = "B" , doc = "Debugging output" , optional = true ) public String DEBUGGING_STR = null ;
@Option ( shortName = "L" , doc = "Genome region to operation on: from chr:start-end" , optional = true ) public String REGION_STR = null ;
2009-02-27 06:15:41 +08:00
@Option ( shortName = "T" , doc = "Type of analysis to run" ) public String Analysis_Name = null ;
2009-03-02 02:27:32 +08:00
@Option ( shortName = "DBSNP" , doc = "DBSNP file" , optional = true ) public String DBSNP_FILE = null ;
2009-03-13 22:50:45 +08:00
@Option ( shortName = "THREADED_IO" , doc = "If true, enables threaded I/O operations" , optional = true ) public String ENABLED_THREADED_IO = "false" ;
2009-03-17 07:22:04 +08:00
@Option ( shortName = "U" , doc = "If true, enables unsafe operations, nothing will be checked at runtime. You better know what you are doing if you set this flag." , optional = false ) public String UNSAFE = "false" ;
2009-03-18 04:29:09 +08:00
@Option ( shortName = "SORT_ON_FLY" , doc = "If true, enables on fly sorting of reads file." , optional = false ) public String ENABLED_SORT_ON_FLY = "false" ;
2009-03-13 22:50:45 +08:00
2009-02-27 05:50:29 +08:00
public static HashMap < String , Object > MODULES = new HashMap < String , Object > ( ) ;
public static void addModule ( final String name , final Object walker ) {
System . out . printf ( "* Adding module %s%n" , name ) ;
MODULES . put ( name , walker ) ;
}
static {
2009-03-02 07:31:23 +08:00
addModule ( "CountLoci" , new CountLociWalker ( ) ) ;
2009-02-27 06:15:41 +08:00
addModule ( "Pileup" , new PileupWalker ( ) ) ;
2009-03-02 07:31:23 +08:00
addModule ( "CountReads" , new CountReadsWalker ( ) ) ;
2009-03-03 02:18:48 +08:00
addModule ( "PrintReads" , new PrintReadsWalker ( ) ) ;
2009-02-27 06:15:41 +08:00
addModule ( "Base_Quality_Histogram" , new BaseQualityHistoWalker ( ) ) ;
2009-03-17 22:08:54 +08:00
addModule ( "Aligned_Reads_Histogram" , new AlignedReadsHistoWalker ( ) ) ;
2009-03-13 12:10:43 +08:00
addModule ( "AlleleFrequency" , new AlleleFrequencyWalker ( ) ) ;
2009-03-12 05:43:31 +08:00
addModule ( "SingleSampleGenotyper" , new SingleSampleGenotyper ( ) ) ;
addModule ( "Null" , new NullWalker ( ) ) ;
2009-03-13 07:30:19 +08:00
addModule ( "DepthOfCoverage" , new DepthOfCoverageWalker ( ) ) ;
2009-03-16 22:46:19 +08:00
addModule ( "CountMismatches" , new MismatchCounterWalker ( ) ) ;
2009-02-27 05:50:29 +08:00
}
private TraversalEngine engine = null ;
public boolean DEBUGGING = false ;
/** Required main method implementation. */
public static void main ( String [ ] argv ) {
2009-03-16 06:21:48 +08:00
System . exit ( new GenomeAnalysisTK ( ) . instanceMain ( argv ) ) ;
2009-02-27 05:50:29 +08:00
}
protected int doWork ( ) {
2009-03-01 04:47:48 +08:00
final boolean TEST_ROD = false ;
ReferenceOrderedData [ ] rods = null ;
2009-02-28 01:07:57 +08:00
2009-03-01 04:47:48 +08:00
if ( TEST_ROD ) {
ReferenceOrderedData gff = new ReferenceOrderedData ( new File ( "trunk/data/gFFTest.gff" ) , rodGFF . class ) ;
gff . testMe ( ) ;
//ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class );
ReferenceOrderedData dbsnp = new ReferenceOrderedData ( new File ( "/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt" ) , rodDbSNP . class ) ;
//dbsnp.testMe();
rods = new ReferenceOrderedData [ ] { dbsnp } ; // { gff, dbsnp };
}
2009-03-02 02:27:32 +08:00
else if ( DBSNP_FILE ! = null ) {
ReferenceOrderedData dbsnp = new ReferenceOrderedData ( new File ( DBSNP_FILE ) , rodDbSNP . class ) ;
//dbsnp.testMe();
rods = new ReferenceOrderedData [ ] { dbsnp } ; // { gff, dbsnp };
}
2009-03-01 04:47:48 +08:00
else {
rods = new ReferenceOrderedData [ ] { } ; // { gff, dbsnp };
}
this . engine = new TraversalEngine ( INPUT_FILE , REF_FILE_ARG , rods ) ;
2009-03-14 00:00:23 +08:00
2009-02-27 06:15:41 +08:00
ValidationStringency strictness ;
2009-02-27 05:50:29 +08:00
if ( STRICTNESS_ARG = = null ) {
strictness = ValidationStringency . STRICT ;
}
else if ( STRICTNESS_ARG . toLowerCase ( ) . equals ( "lenient" ) ) {
strictness = ValidationStringency . LENIENT ;
}
else if ( STRICTNESS_ARG . toLowerCase ( ) . equals ( "silent" ) ) {
strictness = ValidationStringency . SILENT ;
}
else {
strictness = ValidationStringency . STRICT ;
}
System . err . println ( "Strictness is " + strictness ) ;
engine . setStrictness ( strictness ) ;
engine . setDebugging ( ! ( DEBUGGING_STR = = null | | DEBUGGING_STR . toLowerCase ( ) . equals ( "true" ) ) ) ;
engine . setMaxReads ( Integer . parseInt ( MAX_READS_ARG ) ) ;
2009-03-03 02:18:48 +08:00
if ( REGION_STR ! = null ) {
engine . setLocation ( REGION_STR ) ;
}
2009-03-17 07:22:04 +08:00
engine . setSafetyChecking ( ! UNSAFE . toLowerCase ( ) . equals ( "true" ) ) ;
2009-03-18 06:20:03 +08:00
engine . setSortOnFly ( ENABLED_SORT_ON_FLY . toLowerCase ( ) . equals ( "true" ) ) ;
2009-03-17 07:22:04 +08:00
2009-03-16 06:21:48 +08:00
engine . initialize ( ENABLED_THREADED_IO . toLowerCase ( ) . equals ( "true" ) ) ;
//engine.testReference();
2009-02-27 06:15:41 +08:00
//LocusWalker<Integer,Integer> walker = new PileupWalker();
2009-03-04 08:15:35 +08:00
// Try to get the module specified
Object my_module ;
if ( MODULES . containsKey ( Analysis_Name ) ) {
my_module = MODULES . get ( Analysis_Name ) ;
2009-03-13 22:50:45 +08:00
} else {
2009-03-04 08:15:35 +08:00
System . out . println ( "Could not find module " + Analysis_Name ) ;
return 0 ;
}
2009-02-27 06:15:41 +08:00
try {
2009-03-04 08:15:35 +08:00
LocusWalker < ? , ? > walker = ( LocusWalker < ? , ? > ) my_module ;
2009-02-27 06:15:41 +08:00
engine . traverseByLoci ( walker ) ;
}
catch ( java . lang . ClassCastException e ) {
// I guess we're a read walker LOL
2009-03-04 08:15:35 +08:00
ReadWalker < ? , ? > walker = ( ReadWalker < ? , ? > ) my_module ;
2009-02-27 06:15:41 +08:00
engine . traverseByRead ( walker ) ;
}
2009-02-27 05:50:29 +08:00
return 0 ;
}
2009-03-04 08:15:35 +08:00
}