2009-03-16 06:21:48 +08:00
package org.broadinstitute.sting.gatk ;
2009-02-27 05:50:29 +08:00
2009-03-23 05:06:22 +08:00
import edu.mit.broad.picard.reference.ReferenceSequence ;
import edu.mit.broad.picard.reference.ReferenceSequenceFile ;
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory ;
2009-04-03 05:38:00 +08:00
import net.sf.samtools.SAMFileReader ;
2009-03-11 03:34:00 +08:00
import net.sf.samtools.SAMFileReader.ValidationStringency ;
2009-03-22 20:04:11 +08:00
import net.sf.samtools.SAMSequenceRecord ;
2009-03-23 03:57:52 +08:00
import net.sf.samtools.util.RuntimeIOException ;
2009-03-23 05:06:22 +08:00
import org.apache.log4j.Logger ;
2009-04-08 09:52:13 +08:00
import org.apache.commons.cli.OptionBuilder ;
import org.apache.commons.cli.Option ;
2009-03-16 06:21:48 +08:00
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData ;
2009-04-03 04:53:01 +08:00
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum ;
2009-03-16 06:21:48 +08:00
import org.broadinstitute.sting.gatk.refdata.rodDbSNP ;
import org.broadinstitute.sting.gatk.refdata.rodGFF ;
2009-04-03 03:55:19 +08:00
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD ;
2009-04-08 06:33:26 +08:00
import org.broadinstitute.sting.gatk.refdata.rodSAMPileup ;
2009-03-23 05:06:22 +08:00
import org.broadinstitute.sting.gatk.walkers.LocusWalker ;
import org.broadinstitute.sting.gatk.walkers.ReadWalker ;
2009-03-25 08:12:00 +08:00
import org.broadinstitute.sting.gatk.walkers.Walker ;
2009-04-03 21:23:18 +08:00
import org.broadinstitute.sting.gatk.traversals.* ;
2009-04-10 04:28:17 +08:00
import org.broadinstitute.sting.gatk.executive.MicroManager ;
2009-03-23 03:57:52 +08:00
import org.broadinstitute.sting.utils.FastaSequenceFile2 ;
2009-03-23 05:06:22 +08:00
import org.broadinstitute.sting.utils.GenomeLoc ;
import org.broadinstitute.sting.utils.Utils ;
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram ;
2009-02-27 05:50:29 +08:00
2009-03-23 05:06:22 +08:00
import java.io.File ;
2009-03-27 23:03:32 +08:00
import java.io.PrintStream ;
import java.io.FileNotFoundException ;
2009-03-23 03:57:52 +08:00
import java.util.ArrayList ;
2009-03-23 05:06:22 +08:00
import java.util.List ;
2009-02-27 05:50:29 +08:00
2009-03-16 06:21:48 +08:00
public class GenomeAnalysisTK extends CommandLineProgram {
2009-03-27 04:45:27 +08:00
public static GenomeAnalysisTK Instance = null ;
2009-03-23 05:06:22 +08:00
// parameters and their defaults
2009-04-03 21:23:18 +08:00
public File INPUT_FILE = null ;
2009-03-23 05:06:22 +08:00
public String MAX_READS_ARG = "-1" ;
public String STRICTNESS_ARG = "strict" ;
public File REF_FILE_ARG = null ;
public String DEBUGGING_STR = null ;
public String REGION_STR = null ;
public String Analysis_Name = null ;
public String DBSNP_FILE = null ;
2009-03-24 11:58:03 +08:00
public String HAPMAP_FILE = null ;
2009-04-04 01:32:31 +08:00
public String HAPMAP_CHIP_FILE = null ;
2009-03-23 05:06:22 +08:00
public Boolean ENABLED_THREADED_IO = false ;
public Boolean UNSAFE = false ;
2009-04-01 10:11:13 +08:00
public String MAX_ON_FLY_SORTS = null ;
public String DOWNSAMPLE_FRACTION = null ;
public String DOWNSAMPLE_COVERAGE = null ;
2009-03-23 05:06:22 +08:00
public String INTERVALS_FILE = null ;
2009-04-08 06:33:26 +08:00
// added for mendelian walker.
//TODO: when walkers can ask for their tracks this should be removed!
public String MOTHER_GENOTYPE_FILE = null ;
public String FATHER_GENOTYPE_FILE = null ;
public String DAUGHTER_GENOTYPE_FILE = null ;
2009-03-23 05:06:22 +08:00
// our walker manager
2009-03-20 06:12:25 +08:00
private WalkerManager walkerManager = null ;
2009-03-23 05:06:22 +08:00
public String pluginPathName = null ;
2009-02-27 05:50:29 +08:00
private TraversalEngine engine = null ;
public boolean DEBUGGING = false ;
2009-04-03 06:35:30 +08:00
public Boolean WALK_ALL_LOCI = false ;
2009-04-10 04:28:17 +08:00
public Boolean ENABLE_THREADING = false ;
2009-03-23 05:06:22 +08:00
2009-03-27 23:03:32 +08:00
/ * *
* An output file presented to the walker .
* /
public String outFileName = null ;
/ * *
* An error output file presented to the walker .
* /
public String errFileName = null ;
/ * *
* A joint file for both ' normal ' and error output presented to the walker .
* /
public String outErrFileName = null ;
2009-04-10 04:28:17 +08:00
/ * *
* How many threads should be allocated to this analysis .
* /
public int numThreads = 1 ;
2009-03-27 23:03:32 +08:00
/ * *
* The output stream , initialized from OUTFILENAME / OUTERRFILENAME .
* Used by the walker .
* /
public PrintStream out = System . out ;
/ * *
* The output stream , initialized from ERRFILENAME / OUTERRFILENAME .
* Used by the walker .
* /
public PrintStream err = System . err ;
2009-04-03 03:55:19 +08:00
2009-03-23 05:06:22 +08:00
/ * *
* our log , which we want to capture anything from this class
* /
private static Logger logger = Logger . getLogger ( GenomeAnalysisTK . class ) ;
2009-04-08 09:52:13 +08:00
public static ArrayList < String > ROD_BINDINGS = null ;
2009-03-23 05:06:22 +08:00
/ * *
* setup our arguments , both required and optional
* < p / >
* Flags don ' t take an argument , the associated Boolean gets set to true if the flag appears on the command line .
* /
protected void setupArgs ( ) {
2009-04-03 21:23:18 +08:00
m_parser . addOptionalArg ( "input_file" , "I" , "SAM or BAM file" , "INPUT_FILE" ) ;
//m_parser.addRequiredArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
2009-03-23 05:06:22 +08:00
m_parser . addOptionalArg ( "maximum_reads" , "M" , "Maximum number of reads to process before exiting" , "MAX_READS_ARG" ) ;
2009-04-03 00:11:20 +08:00
m_parser . addOptionalArg ( "validation_strictness" , "S" , "How strict should we be with validation (lenient|silent|strict)" , "STRICTNESS_ARG" ) ;
2009-03-23 05:06:22 +08:00
m_parser . addOptionalArg ( "reference_sequence" , "R" , "Reference sequence file" , "REF_FILE_ARG" ) ;
m_parser . addOptionalArg ( "genome_region" , "L" , "Genome region to operation on: from chr:start-end" , "REGION_STR" ) ;
2009-03-25 08:12:00 +08:00
m_parser . addRequiredArg ( "analysis_type" , "T" , "Type of analysis to run" , "Analysis_Name" ) ;
2009-03-23 05:06:22 +08:00
m_parser . addOptionalArg ( "DBSNP" , "D" , "DBSNP file" , "DBSNP_FILE" ) ;
2009-04-03 03:55:19 +08:00
m_parser . addOptionalArg ( "hapmap" , "H" , "Hapmap file" , "HAPMAP_FILE" ) ;
2009-04-04 01:32:31 +08:00
m_parser . addOptionalArg ( "hapmap_chip" , "hc" , "Hapmap chip file" , "HAPMAP_CHIP_FILE" ) ;
2009-03-23 23:01:32 +08:00
m_parser . addOptionalFlag ( "threaded_IO" , "P" , "If set, enables threaded I/O operations" , "ENABLED_THREADED_IO" ) ;
m_parser . addOptionalFlag ( "unsafe" , "U" , "If set, enables unsafe operations, nothing will be checked at runtime." , "UNSAFE" ) ;
2009-04-01 10:11:13 +08:00
m_parser . addOptionalArg ( "sort_on_the_fly" , "sort" , "Maximum number of reads to sort on the fly" , "MAX_ON_FLY_SORTS" ) ;
2009-04-02 04:27:06 +08:00
m_parser . addOptionalArg ( "downsample_to_fraction" , "dfrac" , "Fraction [0.0-1.0] of reads to downsample to" , "DOWNSAMPLE_FRACTION" ) ;
m_parser . addOptionalArg ( "downsample_to_coverage" , "dcov" , "Coverage [integer] to downsample to" , "DOWNSAMPLE_COVERAGE" ) ;
2009-03-23 05:06:22 +08:00
m_parser . addOptionalArg ( "intervals_file" , "V" , "File containing list of genomic intervals to operate on. line := <contig> <start> <end>" , "INTERVALS_FILE" ) ;
2009-04-03 06:23:46 +08:00
m_parser . addOptionalFlag ( "all_loci" , "A" , "Should we process all loci, not just those covered by reads" , "WALK_ALL_LOCI" ) ;
2009-03-27 23:03:32 +08:00
m_parser . addOptionalArg ( "out" , "o" , "An output file presented to the walker. Will overwrite contents if file exists." , "outFileName" ) ;
m_parser . addOptionalArg ( "err" , "e" , "An error output file presented to the walker. Will overwrite contents if file exists." , "errFileName" ) ;
2009-04-03 03:55:19 +08:00
m_parser . addOptionalArg ( "outerr" , "oe" , "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists." , "outErrFileName" ) ;
2009-04-10 04:28:17 +08:00
m_parser . addOptionalArg ( "numthreads" , "nt" , "How many threads should be allocated to running this analysis." , "numThreads" ) ;
m_parser . addOptionalFlag ( "enablethreading" , "et" , "Enable experimental threading support." , "ENABLE_THREADING" ) ;
2009-04-08 06:33:26 +08:00
//TODO: remove when walkers can ask for tracks
m_parser . addOptionalArg ( "mother" , "MOM" , "Mother's genotype (SAM pileup)" , "MOTHER_GENOTYPE_FILE" ) ;
m_parser . addOptionalArg ( "father" , "DAD" , "Father's genotype (SAM pileup)" , "FATHER_GENOTYPE_FILE" ) ;
m_parser . addOptionalArg ( "daughter" , "KID" , "Daughter's genotype (SAM pileup)" , "DAUGHTER_GENOTYPE_FILE" ) ;
2009-04-08 09:52:13 +08:00
// --rodBind <name> <type> <file>
Option rodBinder = OptionBuilder . withArgName ( "rodBind" )
. hasArgs ( )
. withDescription ( "Bind rod with <name> and <type> to <file>" )
. create ( "B" ) ;
2009-04-10 04:28:17 +08:00
m_parser . addOptionalArg ( rodBinder , "ROD_BINDINGS" ) ;
2009-03-23 05:06:22 +08:00
}
2009-03-25 08:12:00 +08:00
/ * *
* GATK can add arguments dynamically based on analysis type .
* @return true
* /
@Override
protected boolean canAddArgumentsDynamically ( ) { return true ; }
/ * *
* GATK provides the walker as an argument source . As a side - effect , initializes the walker variable .
* @return List of walkers to load dynamically .
* /
@Override
2009-03-27 04:45:27 +08:00
protected Class [ ] getArgumentSources ( ) {
2009-03-25 08:12:00 +08:00
if ( Analysis_Name = = null )
throw new IllegalArgumentException ( "Must provide analysis name" ) ;
walkerManager = new WalkerManager ( pluginPathName ) ;
if ( ! walkerManager . doesWalkerExist ( Analysis_Name ) )
throw new IllegalArgumentException ( "Invalid analysis name" ) ;
2009-03-27 04:45:27 +08:00
return new Class [ ] { walkerManager . getWalkerClassByName ( Analysis_Name ) } ;
}
2009-03-25 08:12:00 +08:00
2009-03-27 04:45:27 +08:00
@Override
protected String getArgumentSourceName ( Class argumentSource ) {
2009-04-03 03:55:19 +08:00
return WalkerManager . getWalkerName ( ( Class < Walker > ) argumentSource ) ;
2009-03-25 08:12:00 +08:00
}
2009-03-23 05:06:22 +08:00
/ * *
* Required main method implementation .
* /
2009-02-27 05:50:29 +08:00
public static void main ( String [ ] argv ) {
2009-03-27 04:45:27 +08:00
Instance = new GenomeAnalysisTK ( ) ;
start ( Instance , argv ) ;
2009-02-27 05:50:29 +08:00
}
2009-03-23 05:06:22 +08:00
protected int execute ( ) {
2009-03-01 04:47:48 +08:00
final boolean TEST_ROD = false ;
2009-04-03 04:53:01 +08:00
List < ReferenceOrderedData < ? extends ReferenceOrderedDatum > > rods = new ArrayList < ReferenceOrderedData < ? extends ReferenceOrderedDatum > > ( ) ;
2009-04-03 03:55:19 +08:00
2009-04-08 09:52:13 +08:00
if ( ROD_BINDINGS ! = null ) {
System . out . printf ( "ROD BINDINGS are %s%n" , Utils . join ( ":" , ROD_BINDINGS ) ) ;
}
2009-04-03 03:55:19 +08:00
if ( TEST_ROD ) {
2009-04-04 00:41:33 +08:00
ReferenceOrderedData < rodGFF > gff = new ReferenceOrderedData < rodGFF > ( "test" , new File ( "trunk/data/gFFTest.gff" ) , rodGFF . class ) ;
2009-03-01 04:47:48 +08:00
gff . testMe ( ) ;
//ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class );
2009-04-04 00:41:33 +08:00
ReferenceOrderedData < rodDbSNP > dbsnp = new ReferenceOrderedData < rodDbSNP > ( "dbSNP" , new File ( "/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt" ) , rodDbSNP . class ) ;
2009-03-01 04:47:48 +08:00
//dbsnp.testMe();
2009-03-24 11:58:03 +08:00
rods . add ( dbsnp ) ; // { gff, dbsnp };
2009-04-03 03:55:19 +08:00
} else {
if ( DBSNP_FILE ! = null ) {
2009-04-04 00:41:33 +08:00
ReferenceOrderedData < rodDbSNP > dbsnp = new ReferenceOrderedData < rodDbSNP > ( "dbSNP" , new File ( DBSNP_FILE ) , rodDbSNP . class ) ;
2009-04-03 03:55:19 +08:00
//dbsnp.testMe();
rods . add ( dbsnp ) ; // { gff, dbsnp };
}
if ( HAPMAP_FILE ! = null ) {
2009-04-04 00:41:33 +08:00
ReferenceOrderedData < HapMapAlleleFrequenciesROD > hapmap = new ReferenceOrderedData < HapMapAlleleFrequenciesROD > ( "hapmap" , new File ( HAPMAP_FILE ) , HapMapAlleleFrequenciesROD . class ) ;
2009-04-03 03:55:19 +08:00
//dbsnp.testMe();
rods . add ( hapmap ) ; // { gff, dbsnp };
}
2009-04-04 01:32:31 +08:00
if ( HAPMAP_CHIP_FILE ! = null ) {
2009-04-04 01:41:58 +08:00
ReferenceOrderedData < rodGFF > hapmapChip = new ReferenceOrderedData < rodGFF > ( "hapmap-chip" , new File ( HAPMAP_CHIP_FILE ) , rodGFF . class ) ;
2009-04-04 01:32:31 +08:00
rods . add ( hapmapChip ) ;
}
2009-04-08 06:33:26 +08:00
//TODO: remove when walkers can ask for tracks
if ( MOTHER_GENOTYPE_FILE ! = null )
rods . add ( new ReferenceOrderedData < rodSAMPileup > ( "mother" , new File ( MOTHER_GENOTYPE_FILE ) , rodSAMPileup . class ) ) ;
if ( FATHER_GENOTYPE_FILE ! = null )
rods . add ( new ReferenceOrderedData < rodSAMPileup > ( "father" , new File ( FATHER_GENOTYPE_FILE ) , rodSAMPileup . class ) ) ;
if ( DAUGHTER_GENOTYPE_FILE ! = null )
rods . add ( new ReferenceOrderedData < rodSAMPileup > ( "daughter" , new File ( DAUGHTER_GENOTYPE_FILE ) , rodSAMPileup . class ) ) ;
}
2009-03-01 04:47:48 +08:00
2009-03-27 23:03:32 +08:00
initializeOutputStreams ( ) ;
2009-04-03 04:53:01 +08:00
Walker < ? , ? > my_walker = null ;
2009-03-27 23:40:45 +08:00
try {
my_walker = walkerManager . createWalkerByName ( Analysis_Name ) ;
}
catch ( InstantiationException ex ) {
throw new RuntimeException ( "Unable to instantiate walker." , ex ) ;
}
catch ( IllegalAccessException ex ) {
throw new RuntimeException ( "Unable to access walker" , ex ) ;
}
2009-04-10 04:28:17 +08:00
MicroManager microManager = null ;
2009-03-27 23:40:45 +08:00
// Try to get the walker specified
try {
LocusWalker < ? , ? > walker = ( LocusWalker < ? , ? > ) my_walker ;
2009-04-03 21:23:18 +08:00
if ( INPUT_FILE = = null ) {
if ( walker . requiresReads ( ) )
Utils . scareUser ( String . format ( "Analysis %s requires reads, but none were given" , Analysis_Name ) ) ;
this . engine = new TraverseByReference ( null , REF_FILE_ARG , rods ) ;
} else {
if ( walker . cannotHandleReads ( ) )
Utils . scareUser ( String . format ( "Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided" , Analysis_Name , INPUT_FILE ) ) ;
2009-04-10 04:28:17 +08:00
if ( WALK_ALL_LOCI ) {
// TODO: Temporary debugging code. Activate the new debugging code only when the MicroManager
// is not filtered.
if ( ENABLE_THREADING & & REGION_STR = = null ) {
logger . warn ( "Preliminary threading support enabled" ) ;
microManager = new MicroManager ( INPUT_FILE , REF_FILE_ARG , numThreads ) ;
this . engine = microManager . getTraversalEngine ( ) ;
}
else {
this . engine = new TraverseByLociByReference ( INPUT_FILE , REF_FILE_ARG , rods ) ;
}
}
2009-04-03 21:23:18 +08:00
else
2009-04-08 06:33:26 +08:00
this . engine = new TraverseByLoci ( INPUT_FILE , REF_FILE_ARG , rods ) ;
2009-04-03 21:23:18 +08:00
}
2009-03-27 23:40:45 +08:00
}
catch ( java . lang . ClassCastException e ) {
// I guess we're a read walker LOL
ReadWalker < ? , ? > walker = ( ReadWalker < ? , ? > ) my_walker ;
this . engine = new TraverseByReads ( INPUT_FILE , REF_FILE_ARG , rods ) ;
}
2009-03-14 00:00:23 +08:00
2009-03-22 20:04:11 +08:00
// Prepare the sort ordering w.r.t. the sequence dictionary
2009-03-23 05:06:22 +08:00
if ( REF_FILE_ARG ! = null ) {
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory . getReferenceSequenceFile ( REF_FILE_ARG ) ;
2009-03-29 04:37:27 +08:00
GenomeLoc . setupRefContigOrdering ( refFile ) ;
2009-03-23 05:06:22 +08:00
}
2009-04-10 04:28:17 +08:00
// Determine the validation stringency. Default to ValidationStringency.STRICT.
2009-02-27 06:15:41 +08:00
ValidationStringency strictness ;
2009-04-10 04:28:17 +08:00
try {
strictness = Enum . valueOf ( ValidationStringency . class , STRICTNESS_ARG ) ;
}
catch ( IllegalArgumentException ex ) {
2009-02-27 05:50:29 +08:00
strictness = ValidationStringency . STRICT ;
2009-03-23 05:06:22 +08:00
}
logger . info ( "Strictness is " + strictness ) ;
2009-02-27 05:50:29 +08:00
engine . setStrictness ( strictness ) ;
2009-03-23 05:06:22 +08:00
engine . setDebugging ( ! ( DEBUGGING_STR = = null | | DEBUGGING_STR . toLowerCase ( ) . equals ( "true" ) ) ) ;
2009-02-27 05:50:29 +08:00
engine . setMaxReads ( Integer . parseInt ( MAX_READS_ARG ) ) ;
2009-03-23 05:06:22 +08:00
if ( REGION_STR ! = null ) {
2009-03-03 02:18:48 +08:00
engine . setLocation ( REGION_STR ) ;
}
2009-03-23 05:06:22 +08:00
if ( INTERVALS_FILE ! = null ) {
2009-03-22 00:07:32 +08:00
engine . setLocationFromFile ( INTERVALS_FILE ) ;
}
2009-03-29 04:37:27 +08:00
2009-04-01 10:11:13 +08:00
if ( MAX_ON_FLY_SORTS ! = null ) {
engine . setSortOnFly ( Integer . parseInt ( MAX_ON_FLY_SORTS ) ) ;
}
if ( DOWNSAMPLE_FRACTION ! = null ) {
engine . setDownsampleByFraction ( Double . parseDouble ( DOWNSAMPLE_FRACTION ) ) ;
}
if ( DOWNSAMPLE_COVERAGE ! = null ) {
2009-04-02 04:27:06 +08:00
engine . setDownsampleByCoverage ( Integer . parseInt ( DOWNSAMPLE_COVERAGE ) ) ;
2009-04-01 10:11:13 +08:00
}
2009-03-23 05:06:22 +08:00
engine . setSafetyChecking ( ! UNSAFE ) ;
2009-03-25 06:32:45 +08:00
engine . setThreadedIO ( ENABLED_THREADED_IO ) ;
2009-03-27 08:12:35 +08:00
engine . setWalkOverAllSites ( WALK_ALL_LOCI ) ;
2009-03-25 06:32:45 +08:00
engine . initialize ( ) ;
2009-03-04 08:15:35 +08:00
2009-04-10 04:28:17 +08:00
if ( microManager ! = null ) {
List < GenomeLoc > locations = GenomeLoc . parseGenomeLocs ( REGION_STR ) ;
microManager . execute ( my_walker , locations ) ;
}
else
engine . traverse ( my_walker ) ;
2009-02-27 05:50:29 +08:00
return 0 ;
}
2009-03-23 03:57:52 +08:00
2009-03-27 23:03:32 +08:00
/ * *
* Initialize the output streams as specified by the user .
* /
private void initializeOutputStreams ( ) {
if ( outErrFileName ! = null & & ( outFileName ! = null | | errFileName ! = null ) )
throw new IllegalArgumentException ( "Can't set output/error output file with either out file name or err file name" ) ;
try {
if ( outErrFileName ! = null ) {
PrintStream outErrStream = new PrintStream ( outErrFileName ) ;
out = outErrStream ;
err = outErrStream ;
}
if ( outFileName ! = null ) {
out = new PrintStream ( outFileName ) ;
}
if ( errFileName ! = null ) {
err = new PrintStream ( errFileName ) ;
}
}
catch ( FileNotFoundException ex ) {
throw new RuntimeException ( "Unable to open a walker output file." , ex ) ;
}
}
2009-03-23 03:57:52 +08:00
/ * *
* An inappropriately placed validation and performance testing routine for jumping
* around in the fasta sequence file .
* @param refFileName
* /
private static void testNewReferenceFeatures ( final File refFileName ) {
final FastaSequenceFile2 refFile = new FastaSequenceFile2 ( refFileName ) ;
2009-03-29 04:37:27 +08:00
GenomeLoc . setupRefContigOrdering ( refFile ) ;
2009-03-23 03:57:52 +08:00
List < SAMSequenceRecord > refContigs = refFile . getSequenceDictionary ( ) . getSequences ( ) ;
/ *
for ( SAMSequenceRecord refContig : refContigs ) {
System . out . printf ( " Traversing from chr1 to %s would require jumping %d bytes%n" ,
refContig . getSequenceName ( ) , refFile . getDistanceBetweenContigs ( "chr1" , refContig . getSequenceName ( ) ) ) ;
}
* /
String lastContig = null ;
List < Double > timings = new ArrayList < Double > ( ) ;
for ( SAMSequenceRecord startContig : refFile . getSequenceDictionary ( ) . getSequences ( ) ) {
final String startContigName = startContig . getSequenceName ( ) ;
for ( SAMSequenceRecord targetContig : refFile . getSequenceDictionary ( ) . getSequences ( ) ) {
refFile . seekToContig ( startContigName , true ) ;
2009-03-27 21:27:04 +08:00
logger . info ( String . format ( "Seeking: current=%s, target=%s%n" , startContigName , targetContig . getSequenceName ( ) ) ) ;
2009-03-23 03:57:52 +08:00
long lastTime = System . currentTimeMillis ( ) ;
final boolean success = refFile . seekToContig ( targetContig . getSequenceName ( ) , true ) ;
long curTime = System . currentTimeMillis ( ) ;
final double elapsed = ( curTime - lastTime ) / 1000.0 ;
timings . add ( elapsed ) ;
2009-03-27 21:27:04 +08:00
logger . info ( String . format ( " -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n" ,
elapsed , Utils . averageDouble ( timings ) , timings . size ( ) ) ) ;
2009-03-23 03:57:52 +08:00
if ( ! success ) {
2009-03-27 21:27:04 +08:00
logger . error ( String . format ( "Failured to seek to %s from %s%n" , targetContig . getSequenceName ( ) , lastContig ) ) ;
2009-03-23 03:57:52 +08:00
}
//System.exit(1);
}
}
System . exit ( 1 ) ;
// code for randomly sampling the seeks
// Random rnd = new Random();
// String lastContig = null;
// List<Double> timings = new ArrayList<Double>();
// final int N_SAMPLES = 1000;
// //try { refFile.seekToContig("chr3"); } catch ( IOException e ) {}
// for ( int i = 0; i < N_SAMPLES; i++ ) {
// final int nextIndex = rnd.nextInt(refContigs.size());
// String nextContig = refFile.getSequenceDictionary().getSequence(nextIndex).getSequenceName();
// //nextContig = "chr2";
// try {
// System.out.printf("Seeking: current=%s, target=%s%n", refFile.getContigName(), nextContig);
// long lastTime = System.currentTimeMillis();
// final boolean success = refFile.seekToContig(nextContig, true);
// long curTime = System.currentTimeMillis();
// final double elapsed = (curTime - lastTime) / 1000.0;
// timings.add(elapsed);
// System.out.printf(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n",
// elapsed, Utils.averageDouble(timings), timings.size());
//
// if ( ! success ) {
// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
// }
// //System.exit(1);
// } catch ( IOException e ) {
// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
// e.printStackTrace();
// }
//
// lastContig = nextContig;
// }
// System.exit(1);
/ *
final String targetChr = "chr10" ;
try {
refFile . seekToContig ( targetChr ) ;
} catch ( IOException e ) {
System . out . printf ( "Failured to seek to %s%n" , targetChr ) ;
2009-03-23 05:06:22 +08:00
e . printStackTrace ( ) ;
2009-03-23 03:57:52 +08:00
}
System . exit ( 1 ) ;
* /
//List<Double> timings = new ArrayList<Double>();
final long startTime = System . currentTimeMillis ( ) ;
long lastTime = System . currentTimeMillis ( ) ;
int i = 0 ;
String prevNextContigName = null ;
2009-03-27 21:27:04 +08:00
logger . info ( String . format ( "Walking reference sequence:%n" ) ) ;
2009-03-23 03:57:52 +08:00
for ( SAMSequenceRecord refContig : refContigs ) {
long curTime = System . currentTimeMillis ( ) ;
ReferenceSequence contig = refFile . nextSequence ( ) ;
final double elapsed = ( curTime - lastTime ) / 1000.0 ;
timings . add ( elapsed ) ;
2009-03-27 21:27:04 +08:00
logger . info ( String . format ( "%2d : expected %s contig, found %s with next of %s after %.2f seconds, average is %.2f%n" , i ,
refContig . getSequenceName ( ) , contig . getName ( ) , refFile . getNextContigName ( ) , elapsed , Utils . averageDouble ( timings ) ) ) ;
2009-03-23 03:57:52 +08:00
if ( prevNextContigName ! = null & & contig . getName ( ) ! = null & & ! prevNextContigName . equals ( contig . getName ( ) ) )
throw new RuntimeIOException ( String . format ( "Unexpected contig ordering %s was expected next, but I found %s?" ,
prevNextContigName , contig . getName ( ) ) ) ;
prevNextContigName = refFile . getNextContigName ( ) ;
lastTime = curTime ;
i + + ;
2009-03-27 21:27:04 +08:00
logger . info ( String . format ( " Traversing from chr1 to %s would require jumping %d bytes%n" ,
contig . getName ( ) , refFile . getDistanceBetweenContigs ( "chr1" , contig . getName ( ) ) ) ) ;
2009-03-23 03:57:52 +08:00
}
}
2009-04-03 05:38:00 +08:00
public SAMFileReader getSamReader ( ) { return this . engine . getSamReader ( ) ; }
2009-03-04 08:15:35 +08:00
}