Added support for directly reading SRA runs
This commit is contained in:
parent
6bca5a060f
commit
a465c87ff8
|
|
@ -51,9 +51,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.bqsr;
|
package org.broadinstitute.gatk.tools.walkers.bqsr;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.CigarElement;
|
import htsjdk.samtools.CigarElement;
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.Feature;
|
import htsjdk.tribble.Feature;
|
||||||
import org.broadinstitute.gatk.engine.recalibration.*;
|
import org.broadinstitute.gatk.engine.recalibration.*;
|
||||||
import org.broadinstitute.gatk.engine.walkers.*;
|
import org.broadinstitute.gatk.engine.walkers.*;
|
||||||
|
|
@ -194,7 +194,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
||||||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to mask out known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to mask out known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||||
|
|
||||||
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
|
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
|
||||||
private IndexedFastaSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation
|
private ReferenceSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation
|
||||||
private final static byte NO_BAQ_UNCERTAINTY = (byte)'@';
|
private final static byte NO_BAQ_UNCERTAINTY = (byte)'@';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import htsjdk.samtools.SAMFileWriter;
|
import htsjdk.samtools.SAMFileWriter;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||||
import htsjdk.variant.vcf.VCFConstants;
|
import htsjdk.variant.vcf.VCFConstants;
|
||||||
|
|
@ -487,7 +488,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
private HaplotypeCallerGenotypingEngine genotypingEngine = null;
|
private HaplotypeCallerGenotypingEngine genotypingEngine = null;
|
||||||
|
|
||||||
// fasta reference reader to supplement the edges of the reference sequence
|
// fasta reference reader to supplement the edges of the reference sequence
|
||||||
protected CachingIndexedFastaSequenceFile referenceReader;
|
protected ReferenceSequenceFile referenceReader;
|
||||||
|
|
||||||
// reference base padding size
|
// reference base padding size
|
||||||
private static final int REFERENCE_PADDING = 500;
|
private static final int REFERENCE_PADDING = 500;
|
||||||
|
|
@ -683,12 +684,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
|
|
||||||
vcfWriter.writeHeader(new VCFHeader(headerInfo, sampleSet));
|
vcfWriter.writeHeader(new VCFHeader(headerInfo, sampleSet));
|
||||||
|
|
||||||
try {
|
// fasta reference reader to supplement the edges of the reference sequence
|
||||||
// fasta reference reader to supplement the edges of the reference sequence
|
referenceReader = CachingIndexedFastaSequenceFile.checkAndCreate(getToolkit().getArguments().referenceFile);
|
||||||
referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
|
||||||
} catch( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// create and setup the assembler
|
// create and setup the assembler
|
||||||
assemblyEngine = new ReadThreadingAssembler(RTAC.maxNumHaplotypesInPopulation, RTAC.kmerSizes, RTAC.dontIncreaseKmerSizesForCycles, RTAC.allowNonUniqueKmersInRef, RTAC.numPruningSamples);
|
assemblyEngine = new ReadThreadingAssembler(RTAC.maxNumHaplotypesInPopulation, RTAC.kmerSizes, RTAC.dontIncreaseKmerSizesForCycles, RTAC.allowNonUniqueKmersInRef, RTAC.numPruningSamples);
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
|
@ -71,7 +71,7 @@ import java.util.*;
|
||||||
|
|
||||||
public class VariantOverlapAnnotatorUnitTest extends BaseTest {
|
public class VariantOverlapAnnotatorUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
|
|
|
||||||
|
|
@ -76,5 +76,4 @@ public class BaseQualitySumPerAlleleBySampleUnitTest {
|
||||||
Assert.assertFalse(a.isUsableRead(read));
|
Assert.assertFalse(a.isUsableRead(read));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@
|
||||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.readers.LineIterator;
|
import htsjdk.tribble.readers.LineIterator;
|
||||||
import htsjdk.tribble.readers.PositionalBufferedStream;
|
import htsjdk.tribble.readers.PositionalBufferedStream;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
@ -207,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||||
try {
|
try {
|
||||||
final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference));
|
final ReferenceSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary());
|
final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary());
|
||||||
|
|
||||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
|
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
|
||||||
|
|
|
||||||
|
|
@ -51,11 +51,11 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.Cigar;
|
import htsjdk.samtools.Cigar;
|
||||||
import htsjdk.samtools.CigarElement;
|
import htsjdk.samtools.CigarElement;
|
||||||
import htsjdk.samtools.CigarOperator;
|
import htsjdk.samtools.CigarOperator;
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
|
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
|
|
@ -80,7 +80,7 @@ import java.util.*;
|
||||||
|
|
||||||
public class LocalAssemblyEngineUnitTest extends BaseTest {
|
public class LocalAssemblyEngineUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
|
|
|
||||||
|
|
@ -51,9 +51,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||||
|
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.UnvalidatingGenomeLoc;
|
import org.broadinstitute.gatk.utils.UnvalidatingGenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
|
@ -76,7 +75,7 @@ public class PairHMMIndelErrorModelUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,9 +51,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils;
|
package org.broadinstitute.gatk.utils;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMSequenceDictionary;
|
import htsjdk.samtools.SAMSequenceDictionary;
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -71,7 +71,7 @@ public class ContigComparatorUnitTest extends BaseTest {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
// sequence
|
// sequence
|
||||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
||||||
dictForFails = genomeLocParser.getContigs();
|
dictForFails = genomeLocParser.getContigs();
|
||||||
}
|
}
|
||||||
|
|
@ -81,7 +81,7 @@ public class ContigComparatorUnitTest extends BaseTest {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
for ( final String ref : Arrays.asList(b37KGReference, hg18Reference) ) {
|
for ( final String ref : Arrays.asList(b37KGReference, hg18Reference) ) {
|
||||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(ref));
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(ref));
|
||||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
||||||
final SAMSequenceDictionary dict = genomeLocParser.getContigs();
|
final SAMSequenceDictionary dict = genomeLocParser.getContigs();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@
|
||||||
package org.broadinstitute.gatk.utils.genotyper;
|
package org.broadinstitute.gatk.utils.genotyper;
|
||||||
|
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import htsjdk.variant.variantcontext.Allele;
|
import htsjdk.variant.variantcontext.Allele;
|
||||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||||
|
|
@ -64,7 +65,6 @@ import java.util.Map;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
|
@ -81,7 +81,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
// example fasta index file, can be deleted if you don't use the reference
|
// example fasta index file, can be deleted if you don't use the reference
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||||
import org.broadinstitute.gatk.utils.text.ListFileUtils;
|
import org.broadinstitute.gatk.utils.text.ListFileUtils;
|
||||||
|
import htsjdk.samtools.sra.SRAAccession;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
@ -63,10 +64,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
/**
|
/**
|
||||||
* A list of all the arguments initially used as sources.
|
* A list of all the arguments initially used as sources.
|
||||||
*/
|
*/
|
||||||
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
private final Collection<Object> argumentSources = new ArrayList<>();
|
||||||
|
|
||||||
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
|
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
|
||||||
|
|
||||||
|
private final static String SRA_LIBS_DOWNLOAD = "samjdk.sra_libraries_download";
|
||||||
|
/**
|
||||||
|
* Set GATK version to be used as part of user agent for network requests
|
||||||
|
*/
|
||||||
|
static {
|
||||||
|
System.setProperty(SRA_LIBS_DOWNLOAD, "true");
|
||||||
|
SRAAccession.setAppVersionString("GATK " + getVersionNumber());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* this is the function that the inheriting class can expect to have called
|
* this is the function that the inheriting class can expect to have called
|
||||||
* when the command line system has initialized.
|
* when the command line system has initialized.
|
||||||
|
|
|
||||||
|
|
@ -905,7 +905,7 @@ public class GenomeAnalysisEngine {
|
||||||
* @return A data source for the given set of reads.
|
* @return A data source for the given set of reads.
|
||||||
*/
|
*/
|
||||||
private SAMDataSource createReadsDataSource(final GATKArgumentCollection argCollection, final GenomeLocParser genomeLocParser,
|
private SAMDataSource createReadsDataSource(final GATKArgumentCollection argCollection, final GenomeLocParser genomeLocParser,
|
||||||
final IndexedFastaSequenceFile refReader, final Map<String, String> sampleRenameMap) {
|
final ReferenceSequenceFile refReader, final Map<String, String> sampleRenameMap) {
|
||||||
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
||||||
|
|
||||||
// Synchronize the method back into the collection so that it shows up when
|
// Synchronize the method back into the collection so that it shows up when
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.engine.alignment.bwa.java;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.alignment.Aligner;
|
import org.broadinstitute.gatk.engine.alignment.Aligner;
|
||||||
import org.broadinstitute.gatk.engine.alignment.Alignment;
|
import org.broadinstitute.gatk.engine.alignment.Alignment;
|
||||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||||
|
|
@ -126,7 +127,7 @@ public class AlignerTestHarness {
|
||||||
|
|
||||||
mismatches++;
|
mismatches++;
|
||||||
|
|
||||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
final ReferenceSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||||
|
|
||||||
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
||||||
read.getAlignmentStart(),
|
read.getAlignmentStart(),
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.ReadProperties;
|
import org.broadinstitute.gatk.engine.ReadProperties;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||||
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
|
|
@ -62,7 +62,7 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
public LocusShardDataProvider(final Shard shard, final ReadProperties sourceInfo, final GenomeLocParser genomeLocParser, final GenomeLoc locus, final LocusIterator locusIterator, final ReferenceSequenceFile reference, final Collection<ReferenceOrderedDataSource> rods) {
|
||||||
super(shard,genomeLocParser,reference,rods);
|
super(shard,genomeLocParser,reference,rods);
|
||||||
this.sourceInfo = sourceInfo;
|
this.sourceInfo = sourceInfo;
|
||||||
this.locus = locus;
|
this.locus = locus;
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||||
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||||
|
|
@ -50,7 +51,7 @@ public class ReadShardDataProvider extends ShardDataProvider {
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public ReadShardDataProvider(Shard shard, GenomeLocParser genomeLocParser, GATKSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
public ReadShardDataProvider(final Shard shard, final GenomeLocParser genomeLocParser, final GATKSAMIterator reads, final ReferenceSequenceFile reference, final Collection<ReferenceOrderedDataSource> rods) {
|
||||||
super(shard,genomeLocParser,reference,rods);
|
super(shard,genomeLocParser,reference,rods);
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
import htsjdk.samtools.reference.ReferenceSequence;
|
import htsjdk.samtools.reference.ReferenceSequence;
|
||||||
import htsjdk.samtools.SAMRecord;
|
import htsjdk.samtools.SAMRecord;
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
|
@ -61,13 +62,13 @@ public class ReferenceView implements View {
|
||||||
/**
|
/**
|
||||||
* The source of reference data.
|
* The source of reference data.
|
||||||
*/
|
*/
|
||||||
protected IndexedFastaSequenceFile reference = null;
|
protected ReferenceSequenceFile reference = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new ReferenceView.
|
* Create a new ReferenceView.
|
||||||
* @param provider
|
* @param provider
|
||||||
*/
|
*/
|
||||||
public ReferenceView( ShardDataProvider provider ) {
|
public ReferenceView( final ShardDataProvider provider ) {
|
||||||
this.genomeLocParser = provider.getGenomeLocParser();
|
this.genomeLocParser = provider.getGenomeLocParser();
|
||||||
this.reference = provider.getReference();
|
this.reference = provider.getReference();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||||
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
|
@ -70,7 +70,7 @@ public abstract class ShardDataProvider {
|
||||||
/**
|
/**
|
||||||
* Provider of reference data for this particular shard.
|
* Provider of reference data for this particular shard.
|
||||||
*/
|
*/
|
||||||
private final IndexedFastaSequenceFile reference;
|
private final ReferenceSequenceFile reference;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sources of reference-ordered data.
|
* Sources of reference-ordered data.
|
||||||
|
|
@ -106,7 +106,7 @@ public abstract class ShardDataProvider {
|
||||||
* Gets a pointer into the given indexed fasta sequence file.
|
* Gets a pointer into the given indexed fasta sequence file.
|
||||||
* @return The indexed fasta sequence file.
|
* @return The indexed fasta sequence file.
|
||||||
*/
|
*/
|
||||||
IndexedFastaSequenceFile getReference() {
|
ReferenceSequenceFile getReference() {
|
||||||
return reference;
|
return reference;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -131,7 +131,7 @@ public abstract class ShardDataProvider {
|
||||||
* @param shard The chunk of data over which traversals happen.
|
* @param shard The chunk of data over which traversals happen.
|
||||||
* @param reference A getter for a section of the reference.
|
* @param reference A getter for a section of the reference.
|
||||||
*/
|
*/
|
||||||
public ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
|
public ShardDataProvider(final Shard shard, final GenomeLocParser genomeLocParser, final ReferenceSequenceFile reference, final Collection<ReferenceOrderedDataSource> rods) {
|
||||||
this.shard = shard;
|
this.shard = shard;
|
||||||
this.genomeLocParser = genomeLocParser;
|
this.genomeLocParser = genomeLocParser;
|
||||||
this.reference = reference;
|
this.reference = reference;
|
||||||
|
|
|
||||||
|
|
@ -25,20 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.Bin;
|
||||||
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
|
||||||
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
|
||||||
import htsjdk.samtools.seekablestream.SeekableStream;
|
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.ByteOrder;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A basic interface for querying BAM indices.
|
* A basic interface for querying BAM indices.
|
||||||
|
|
@ -47,116 +34,24 @@ import java.util.List;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class GATKBAMIndex {
|
public abstract class GATKBAMIndex {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BAM index file magic number.
|
* What is the starting bin for each level?
|
||||||
*/
|
*/
|
||||||
private static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
|
protected static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reports the total amount of genomic data that any bin can index.
|
* Reports the total amount of genomic data that any bin can index.
|
||||||
*/
|
*/
|
||||||
protected static final int BIN_GENOMIC_SPAN = 512*1024*1024;
|
protected static final int BIN_GENOMIC_SPAN = 512*1024*1024;
|
||||||
|
|
||||||
/**
|
|
||||||
* What is the starting bin for each level?
|
|
||||||
*/
|
|
||||||
private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reports the maximum number of bins that can appear in a BAM file.
|
* Reports the maximum number of bins that can appear in a BAM file.
|
||||||
*/
|
*/
|
||||||
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||||
|
|
||||||
private final SAMSequenceDictionary sequenceDictionary;
|
public abstract GATKBAMIndexData readReferenceSequence(final int referenceSequence);
|
||||||
private final File mFile;
|
|
||||||
|
|
||||||
//TODO: figure out a good value for this buffer size
|
|
||||||
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of sequences stored in this index.
|
|
||||||
*/
|
|
||||||
private final int sequenceCount;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A cache of the starting positions of the sequences.
|
|
||||||
*/
|
|
||||||
private final long[] sequenceStartCache;
|
|
||||||
|
|
||||||
private SeekableFileStream fileStream;
|
|
||||||
private SeekableStream baiStream;
|
|
||||||
private SeekableBufferedStream bufferedStream;
|
|
||||||
private long fileLength;
|
|
||||||
|
|
||||||
public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
|
|
||||||
mFile = file;
|
|
||||||
this.sequenceDictionary = sequenceDictionary;
|
|
||||||
|
|
||||||
// Open the file stream.
|
|
||||||
openIndexFile();
|
|
||||||
|
|
||||||
// Verify the magic number.
|
|
||||||
seek(0);
|
|
||||||
final byte[] buffer = readBytes(4);
|
|
||||||
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
|
||||||
throw new ReviewedGATKException("Invalid file header in BAM index " + mFile +
|
|
||||||
": " + new String(buffer));
|
|
||||||
}
|
|
||||||
|
|
||||||
seek(4);
|
|
||||||
|
|
||||||
sequenceCount = readInteger();
|
|
||||||
|
|
||||||
// Create a cache of the starting position of each sequence. Initialize it to -1.
|
|
||||||
sequenceStartCache = new long[sequenceCount];
|
|
||||||
for(int i = 1; i < sequenceCount; i++)
|
|
||||||
sequenceStartCache[i] = -1;
|
|
||||||
|
|
||||||
// Seed the first element in the array with the current position.
|
|
||||||
if(sequenceCount > 0)
|
|
||||||
sequenceStartCache[0] = position();
|
|
||||||
|
|
||||||
closeIndexFile();
|
|
||||||
}
|
|
||||||
|
|
||||||
public GATKBAMIndexData readReferenceSequence(final int referenceSequence) {
|
|
||||||
openIndexFile();
|
|
||||||
|
|
||||||
if (referenceSequence >= sequenceCount)
|
|
||||||
throw new ReviewedGATKException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
|
|
||||||
|
|
||||||
skipToSequence(referenceSequence);
|
|
||||||
|
|
||||||
int binCount = readInteger();
|
|
||||||
List<GATKBin> bins = new ArrayList<>();
|
|
||||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
|
||||||
final int indexBin = readInteger();
|
|
||||||
final int nChunks = readInteger();
|
|
||||||
|
|
||||||
List<GATKChunk> chunks = new ArrayList<>(nChunks);
|
|
||||||
long[] rawChunkData = readLongs(nChunks*2);
|
|
||||||
for (int ci = 0; ci < nChunks; ci++) {
|
|
||||||
final long chunkBegin = rawChunkData[ci*2];
|
|
||||||
final long chunkEnd = rawChunkData[ci*2+1];
|
|
||||||
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
|
||||||
}
|
|
||||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
|
||||||
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
|
||||||
while(indexBin >= bins.size())
|
|
||||||
bins.add(null);
|
|
||||||
bins.set(indexBin,bin);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int nLinearBins = readInteger();
|
|
||||||
long[] linearIndexEntries = readLongs(nLinearBins);
|
|
||||||
|
|
||||||
LinearIndex linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
|
||||||
|
|
||||||
closeIndexFile();
|
|
||||||
|
|
||||||
return new GATKBAMIndexData(this,referenceSequence,bins,linearIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the number of levels employed by this index.
|
* Get the number of levels employed by this index.
|
||||||
|
|
@ -180,91 +75,35 @@ public class GATKBAMIndex {
|
||||||
* @param levelNumber Level number. 0-based.
|
* @param levelNumber Level number. 0-based.
|
||||||
* @return The size (number of possible bins) of the given level.
|
* @return The size (number of possible bins) of the given level.
|
||||||
*/
|
*/
|
||||||
public int getLevelSize(final int levelNumber) {
|
public abstract int getLevelSize(final int levelNumber);
|
||||||
if(levelNumber == getNumIndexLevels()-1)
|
|
||||||
return MAX_BINS-LEVEL_STARTS[levelNumber]-1;
|
|
||||||
else
|
|
||||||
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the level associated with the given bin number.
|
* Gets the level associated with the given bin number.
|
||||||
* @param bin The bin for which to determine the level.
|
* @param bin The bin for which to determine the level.
|
||||||
* @return the level associated with the given bin number.
|
* @return the level associated with the given bin number.
|
||||||
*/
|
*/
|
||||||
public int getLevelForBin(final Bin bin) {
|
public abstract int getLevelForBin(final Bin bin);
|
||||||
GATKBin gatkBin = new GATKBin(bin);
|
|
||||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
|
||||||
throw new ReviewedGATKException("Tried to get level for invalid bin in index file " + mFile);
|
|
||||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
|
||||||
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
throw new ReviewedGATKException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the first locus that this bin can index into.
|
* Gets the first locus that this bin can index into.
|
||||||
* @param bin The bin to test.
|
* @param bin The bin to test.
|
||||||
* @return The last position that the given bin can represent.
|
* @return The last position that the given bin can represent.
|
||||||
*/
|
*/
|
||||||
public int getFirstLocusInBin(final Bin bin) {
|
public abstract int getFirstLocusInBin(final Bin bin);
|
||||||
final int level = getLevelForBin(bin);
|
|
||||||
final int levelStart = LEVEL_STARTS[level];
|
|
||||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
|
||||||
return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the last locus that this bin can index into.
|
* Gets the last locus that this bin can index into.
|
||||||
* @param bin The bin to test.
|
* @param bin The bin to test.
|
||||||
* @return The last position that the given bin can represent.
|
* @return The last position that the given bin can represent.
|
||||||
*/
|
*/
|
||||||
public int getLastLocusInBin(final Bin bin) {
|
public abstract int getLastLocusInBin(final Bin bin);
|
||||||
final int level = getLevelForBin(bin);
|
|
||||||
final int levelStart = LEVEL_STARTS[level];
|
|
||||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
|
||||||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use to get close to the unmapped reads at the end of a BAM file.
|
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||||
* @return The file offset of the first record in the last linear bin, or -1
|
* @return The file offset of the first record in the last linear bin, or -1
|
||||||
* if there are no elements in linear bins (i.e. no mapped reads).
|
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||||
*/
|
*/
|
||||||
public long getStartOfLastLinearBin() {
|
public abstract long getStartOfLastLinearBin();
|
||||||
openIndexFile();
|
|
||||||
|
|
||||||
seek(4);
|
|
||||||
|
|
||||||
final int sequenceCount = readInteger();
|
|
||||||
// Because no reads may align to the last sequence in the sequence dictionary,
|
|
||||||
// grab the last element of the linear index for each sequence, and return
|
|
||||||
// the last one from the last sequence that has one.
|
|
||||||
long lastLinearIndexPointer = -1;
|
|
||||||
for (int i = 0; i < sequenceCount; i++) {
|
|
||||||
// System.out.println("# Sequence TID: " + i);
|
|
||||||
final int nBins = readInteger();
|
|
||||||
// System.out.println("# nBins: " + nBins);
|
|
||||||
for (int j1 = 0; j1 < nBins; j1++) {
|
|
||||||
// Skip bin #
|
|
||||||
skipBytes(4);
|
|
||||||
final int nChunks = readInteger();
|
|
||||||
// Skip chunks
|
|
||||||
skipBytes(16 * nChunks);
|
|
||||||
}
|
|
||||||
final int nLinearBins = readInteger();
|
|
||||||
if (nLinearBins > 0) {
|
|
||||||
// Skip to last element of list of linear bins
|
|
||||||
skipBytes(8 * (nLinearBins - 1));
|
|
||||||
lastLinearIndexPointer = readLongs(1)[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
closeIndexFile();
|
|
||||||
|
|
||||||
return lastLinearIndexPointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the possible number of bins for a given reference sequence.
|
* Gets the possible number of bins for a given reference sequence.
|
||||||
|
|
@ -273,197 +112,4 @@ public class GATKBAMIndex {
|
||||||
protected int getMaxAddressibleGenomicLocation() {
|
protected int getMaxAddressibleGenomicLocation() {
|
||||||
return BIN_GENOMIC_SPAN;
|
return BIN_GENOMIC_SPAN;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void skipToSequence(final int referenceSequence) {
|
|
||||||
// Find the offset in the file of the last sequence whose position has been determined. Start here
|
|
||||||
// when searching the sequence for the next value to read. (Note that sequenceStartCache[0] will always
|
|
||||||
// be present, so no extra stopping condition is necessary.
|
|
||||||
int sequenceIndex = referenceSequence;
|
|
||||||
while(sequenceStartCache[sequenceIndex] == -1)
|
|
||||||
sequenceIndex--;
|
|
||||||
|
|
||||||
// Advance to the most recently found position.
|
|
||||||
seek(sequenceStartCache[sequenceIndex]);
|
|
||||||
|
|
||||||
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
|
||||||
sequenceStartCache[i] = position();
|
|
||||||
// System.out.println("# Sequence TID: " + i);
|
|
||||||
final int nBins = readInteger();
|
|
||||||
// System.out.println("# nBins: " + nBins);
|
|
||||||
for (int j = 0; j < nBins; j++) {
|
|
||||||
/* final int bin = */
|
|
||||||
readInteger();
|
|
||||||
final int nChunks = readInteger();
|
|
||||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
|
||||||
skipBytes(16 * nChunks);
|
|
||||||
}
|
|
||||||
final int nLinearBins = readInteger();
|
|
||||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
|
||||||
skipBytes(8 * nLinearBins);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
sequenceStartCache[referenceSequence] = position();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void openIndexFile() {
|
|
||||||
try {
|
|
||||||
fileStream = new SeekableFileStream(mFile);
|
|
||||||
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
|
|
||||||
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
|
|
||||||
fileLength=bufferedStream.length();
|
|
||||||
}
|
|
||||||
catch (IOException exc) {
|
|
||||||
throw new ReviewedGATKException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void closeIndexFile() {
|
|
||||||
try {
|
|
||||||
bufferedStream.close();
|
|
||||||
baiStream.close();
|
|
||||||
fileStream.close();
|
|
||||||
fileLength = -1;
|
|
||||||
}
|
|
||||||
catch (IOException exc) {
|
|
||||||
throw new ReviewedGATKException("Unable to close index file " + mFile, exc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
|
||||||
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
|
||||||
|
|
||||||
private byte[] readBytes(int count) {
|
|
||||||
ByteBuffer buffer = getBuffer(count);
|
|
||||||
read(buffer);
|
|
||||||
buffer.flip();
|
|
||||||
byte[] contents = new byte[count];
|
|
||||||
buffer.get(contents);
|
|
||||||
return contents;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int readInteger() {
|
|
||||||
ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
|
|
||||||
read(buffer);
|
|
||||||
buffer.flip();
|
|
||||||
return buffer.getInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
|
||||||
* @param count Number of longs to read.
|
|
||||||
* @return An array of longs. Size of array should match count.
|
|
||||||
*/
|
|
||||||
private long[] readLongs(final int count) {
|
|
||||||
ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
|
|
||||||
read(buffer);
|
|
||||||
buffer.flip();
|
|
||||||
long[] result = new long[count];
|
|
||||||
for(int i = 0; i < count; i++)
|
|
||||||
result[i] = buffer.getLong();
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void read(final ByteBuffer buffer) {
|
|
||||||
final int bytesRequested = buffer.limit();
|
|
||||||
if (bytesRequested == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
try {
|
|
||||||
|
|
||||||
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
|
||||||
//and put the result into the bytebuffer after the if statement.
|
|
||||||
|
|
||||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
|
||||||
// our buffer to -- if there isn't enough data in the file, the index
|
|
||||||
// must be truncated or otherwise corrupt:
|
|
||||||
if(bytesRequested > fileLength - bufferedStream.position()){
|
|
||||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
|
||||||
"It's likely that this file is truncated or corrupt -- " +
|
|
||||||
"Please try re-indexing the corresponding BAM file.",
|
|
||||||
mFile));
|
|
||||||
}
|
|
||||||
|
|
||||||
int bytesRead = bufferedStream.read(byteArray, 0, bytesRequested);
|
|
||||||
|
|
||||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
|
||||||
// our buffer to -- if we encounter EOF (-1), the index
|
|
||||||
// must be truncated or otherwise corrupt:
|
|
||||||
if (bytesRead <= 0) {
|
|
||||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
|
||||||
"It's likely that this file is truncated or corrupt -- " +
|
|
||||||
"Please try re-indexing the corresponding BAM file.",
|
|
||||||
mFile));
|
|
||||||
}
|
|
||||||
|
|
||||||
if(bytesRead != bytesRequested)
|
|
||||||
throw new RuntimeException("Read amount different from requested amount. This should not happen.");
|
|
||||||
|
|
||||||
buffer.put(byteArray, 0, bytesRequested);
|
|
||||||
}
|
|
||||||
catch(IOException ex) {
|
|
||||||
throw new ReviewedGATKException("Index: unable to read bytes from index file " + mFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A reusable buffer for use by this index generator.
|
|
||||||
* TODO: Should this be a SoftReference?
|
|
||||||
*/
|
|
||||||
private ByteBuffer buffer = null;
|
|
||||||
|
|
||||||
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
|
||||||
private byte[] byteArray=null;
|
|
||||||
private ByteBuffer getBuffer(final int size) {
|
|
||||||
if(buffer == null || buffer.capacity() < size) {
|
|
||||||
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
|
||||||
buffer = ByteBuffer.allocate(size);
|
|
||||||
byteArray = new byte[size];
|
|
||||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
|
||||||
}
|
|
||||||
buffer.clear();
|
|
||||||
buffer.limit(size);
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void skipBytes(final int count) {
|
|
||||||
try {
|
|
||||||
|
|
||||||
//try to skip forward the requested amount.
|
|
||||||
long skipped = bufferedStream.skip(count);
|
|
||||||
|
|
||||||
if( skipped != count ) { //if not managed to skip the requested amount
|
|
||||||
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch(IOException ex) {
|
|
||||||
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void seek(final long position) {
|
|
||||||
try {
|
|
||||||
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
|
||||||
bufferedStream.seek(position);
|
|
||||||
}
|
|
||||||
catch(IOException ex) {
|
|
||||||
throw new ReviewedGATKException("Index: unable to reposition of file channel of index file " + mFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieve the position from the current file channel.
|
|
||||||
* @return position of the current file channel.
|
|
||||||
*/
|
|
||||||
private long position() {
|
|
||||||
try {
|
|
||||||
return bufferedStream.position();
|
|
||||||
}
|
|
||||||
catch (IOException exc) {
|
|
||||||
throw new ReviewedGATKException("Unable to read position from index file " + mFile, exc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,109 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2012-2016 Broad Institute, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
|
import htsjdk.samtools.*;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class implements BAM index querying API
|
||||||
|
* by wrapping a class BrowseableBAMIndex from HTSJDK.
|
||||||
|
*
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class GATKBAMIndexFromDataSource extends GATKBAMIndex {
|
||||||
|
private File sourceFile;
|
||||||
|
private SAMFileHeader sourceHeader;
|
||||||
|
private BrowseableBAMIndex index;
|
||||||
|
|
||||||
|
public GATKBAMIndexFromDataSource(final File sourceFile, final SAMFileHeader sourceHeader, final BrowseableBAMIndex index) {
|
||||||
|
this.sourceFile = sourceFile;
|
||||||
|
this.sourceHeader = sourceHeader;
|
||||||
|
this.index = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GATKBAMIndexData readReferenceSequence(final int referenceSequence) {
|
||||||
|
final List<SAMSequenceRecord> sequences = sourceHeader.getSequenceDictionary().getSequences();
|
||||||
|
if (referenceSequence >= sequences.size())
|
||||||
|
throw new ReviewedGATKException("Sequence number " + referenceSequence + " cannot be greater or equal to " + sequences.size() + " in index file " + sourceFile);
|
||||||
|
|
||||||
|
|
||||||
|
final BinList sourceBins = index.getBinsOverlapping(referenceSequence, 0, sequences.get(referenceSequence).getSequenceLength());
|
||||||
|
|
||||||
|
final List<GATKBin> bins = new ArrayList<>();
|
||||||
|
for (Bin sourceBin : sourceBins) {
|
||||||
|
final int indexBin = sourceBin.getBinNumber();
|
||||||
|
while(indexBin >= bins.size())
|
||||||
|
bins.add(null);
|
||||||
|
|
||||||
|
final GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||||
|
final List<Chunk> chunks = index.getSpanOverlapping(sourceBin).getChunks();
|
||||||
|
final List<GATKChunk> gatkChunks = new ArrayList<>(chunks.size());
|
||||||
|
for (Chunk chunk : chunks) {
|
||||||
|
gatkChunks.add(new GATKChunk(chunk));
|
||||||
|
}
|
||||||
|
|
||||||
|
bin.setChunkList(gatkChunks.toArray(new GATKChunk[gatkChunks.size()]));
|
||||||
|
|
||||||
|
bins.set(indexBin, bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
// there is no interface to get linear index from HTSJDK
|
||||||
|
final LinearIndex linearIndex = new LinearIndex(referenceSequence, 0, new long[]{});
|
||||||
|
|
||||||
|
return new GATKBAMIndexData(this,referenceSequence,bins,linearIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLevelSize(int levelNumber) {
|
||||||
|
return index.getLevelSize(levelNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLevelForBin(Bin bin) {
|
||||||
|
return index.getLevelForBin(bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getFirstLocusInBin(Bin bin) {
|
||||||
|
return index.getFirstLocusInBin(bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getLastLocusInBin(Bin bin) {
|
||||||
|
return index.getLastLocusInBin(bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getStartOfLastLinearBin() {
|
||||||
|
return index.getStartOfLastLinearBin();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,435 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2012-2016 Broad Institute, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
||||||
|
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
||||||
|
import htsjdk.samtools.seekablestream.SeekableStream;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class implements BAM index querying API
|
||||||
|
* by reading .bai file directly.
|
||||||
|
* Very much not thread-safe.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class GATKBAMIndexFromFile extends GATKBAMIndex {
|
||||||
|
/**
|
||||||
|
* BAM index file magic number.
|
||||||
|
*/
|
||||||
|
private static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
|
||||||
|
|
||||||
|
private final SAMSequenceDictionary sequenceDictionary;
|
||||||
|
private final File mFile;
|
||||||
|
|
||||||
|
//TODO: figure out a good value for this buffer size
|
||||||
|
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of sequences stored in this index.
|
||||||
|
*/
|
||||||
|
private final int sequenceCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A cache of the starting positions of the sequences.
|
||||||
|
*/
|
||||||
|
private final long[] sequenceStartCache;
|
||||||
|
|
||||||
|
private SeekableFileStream fileStream;
|
||||||
|
private SeekableStream baiStream;
|
||||||
|
private SeekableBufferedStream bufferedStream;
|
||||||
|
private long fileLength;
|
||||||
|
|
||||||
|
public GATKBAMIndexFromFile(final File file, final SAMSequenceDictionary sequenceDictionary) {
|
||||||
|
mFile = file;
|
||||||
|
this.sequenceDictionary = sequenceDictionary;
|
||||||
|
|
||||||
|
// Open the file stream.
|
||||||
|
openIndexFile();
|
||||||
|
|
||||||
|
// Verify the magic number.
|
||||||
|
seek(0);
|
||||||
|
final byte[] buffer = readBytes(4);
|
||||||
|
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
||||||
|
throw new ReviewedGATKException("Invalid file header in BAM index " + mFile +
|
||||||
|
": " + new String(buffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
seek(4);
|
||||||
|
|
||||||
|
sequenceCount = readInteger();
|
||||||
|
|
||||||
|
// Create a cache of the starting position of each sequence. Initialize it to -1.
|
||||||
|
sequenceStartCache = new long[sequenceCount];
|
||||||
|
for(int i = 1; i < sequenceCount; i++)
|
||||||
|
sequenceStartCache[i] = -1;
|
||||||
|
|
||||||
|
// Seed the first element in the array with the current position.
|
||||||
|
if(sequenceCount > 0)
|
||||||
|
sequenceStartCache[0] = position();
|
||||||
|
|
||||||
|
closeIndexFile();
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKBAMIndexData readReferenceSequence(final int referenceSequence) {
|
||||||
|
openIndexFile();
|
||||||
|
|
||||||
|
if (referenceSequence >= sequenceCount)
|
||||||
|
throw new ReviewedGATKException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
|
||||||
|
|
||||||
|
skipToSequence(referenceSequence);
|
||||||
|
|
||||||
|
final int binCount = readInteger();
|
||||||
|
List<GATKBin> bins = new ArrayList<>();
|
||||||
|
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||||
|
final int indexBin = readInteger();
|
||||||
|
final int nChunks = readInteger();
|
||||||
|
|
||||||
|
final List<GATKChunk> chunks = new ArrayList<>(nChunks);
|
||||||
|
final long[] rawChunkData = readLongs(nChunks*2);
|
||||||
|
for (int ci = 0; ci < nChunks; ci++) {
|
||||||
|
final long chunkBegin = rawChunkData[ci*2];
|
||||||
|
final long chunkEnd = rawChunkData[ci*2+1];
|
||||||
|
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
||||||
|
}
|
||||||
|
final GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||||
|
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||||
|
while(indexBin >= bins.size())
|
||||||
|
bins.add(null);
|
||||||
|
bins.set(indexBin,bin);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int nLinearBins = readInteger();
|
||||||
|
long[] linearIndexEntries = readLongs(nLinearBins);
|
||||||
|
|
||||||
|
final LinearIndex linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
||||||
|
|
||||||
|
closeIndexFile();
|
||||||
|
|
||||||
|
return new GATKBAMIndexData(this,referenceSequence,bins,linearIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the number of bins in the given level.
|
||||||
|
* @param levelNumber Level number. 0-based.
|
||||||
|
* @return The size (number of possible bins) of the given level.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getLevelSize(final int levelNumber) {
|
||||||
|
if(levelNumber == getNumIndexLevels()-1)
|
||||||
|
return MAX_BINS-LEVEL_STARTS[levelNumber]-1;
|
||||||
|
else
|
||||||
|
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the level associated with the given bin number.
|
||||||
|
* @param bin The bin for which to determine the level.
|
||||||
|
* @return the level associated with the given bin number.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getLevelForBin(final Bin bin) {
|
||||||
|
final GATKBin gatkBin = new GATKBin(bin);
|
||||||
|
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||||
|
throw new ReviewedGATKException("Tried to get level for invalid bin in index file " + mFile);
|
||||||
|
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||||
|
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
throw new ReviewedGATKException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the first locus that this bin can index into.
|
||||||
|
* @param bin The bin to test.
|
||||||
|
* @return The last position that the given bin can represent.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getFirstLocusInBin(final Bin bin) {
|
||||||
|
final int level = getLevelForBin(bin);
|
||||||
|
final int levelStart = LEVEL_STARTS[level];
|
||||||
|
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||||
|
return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the last locus that this bin can index into.
|
||||||
|
* @param bin The bin to test.
|
||||||
|
* @return The last position that the given bin can represent.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getLastLocusInBin(final Bin bin) {
|
||||||
|
final int level = getLevelForBin(bin);
|
||||||
|
final int levelStart = LEVEL_STARTS[level];
|
||||||
|
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||||
|
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||||
|
* @return The file offset of the first record in the last linear bin, or -1
|
||||||
|
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public long getStartOfLastLinearBin() {
|
||||||
|
openIndexFile();
|
||||||
|
|
||||||
|
seek(4);
|
||||||
|
|
||||||
|
final int sequenceCount = readInteger();
|
||||||
|
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||||
|
// grab the last element of the linear index for each sequence, and return
|
||||||
|
// the last one from the last sequence that has one.
|
||||||
|
long lastLinearIndexPointer = -1;
|
||||||
|
for (int i = 0; i < sequenceCount; i++) {
|
||||||
|
// System.out.println("# Sequence TID: " + i);
|
||||||
|
final int nBins = readInteger();
|
||||||
|
// System.out.println("# nBins: " + nBins);
|
||||||
|
for (int j1 = 0; j1 < nBins; j1++) {
|
||||||
|
// Skip bin #
|
||||||
|
skipBytes(4);
|
||||||
|
final int nChunks = readInteger();
|
||||||
|
// Skip chunks
|
||||||
|
skipBytes(16 * nChunks);
|
||||||
|
}
|
||||||
|
final int nLinearBins = readInteger();
|
||||||
|
if (nLinearBins > 0) {
|
||||||
|
// Skip to last element of list of linear bins
|
||||||
|
skipBytes(8 * (nLinearBins - 1));
|
||||||
|
lastLinearIndexPointer = readLongs(1)[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
closeIndexFile();
|
||||||
|
|
||||||
|
return lastLinearIndexPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void skipToSequence(final int referenceSequence) {
|
||||||
|
// Find the offset in the file of the last sequence whose position has been determined. Start here
|
||||||
|
// when searching the sequence for the next value to read. (Note that sequenceStartCache[0] will always
|
||||||
|
// be present, so no extra stopping condition is necessary.
|
||||||
|
int sequenceIndex = referenceSequence;
|
||||||
|
while(sequenceStartCache[sequenceIndex] == -1)
|
||||||
|
sequenceIndex--;
|
||||||
|
|
||||||
|
// Advance to the most recently found position.
|
||||||
|
seek(sequenceStartCache[sequenceIndex]);
|
||||||
|
|
||||||
|
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
||||||
|
sequenceStartCache[i] = position();
|
||||||
|
// System.out.println("# Sequence TID: " + i);
|
||||||
|
final int nBins = readInteger();
|
||||||
|
// System.out.println("# nBins: " + nBins);
|
||||||
|
for (int j = 0; j < nBins; j++) {
|
||||||
|
/* final int bin = */
|
||||||
|
readInteger();
|
||||||
|
final int nChunks = readInteger();
|
||||||
|
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||||
|
skipBytes(16 * nChunks);
|
||||||
|
}
|
||||||
|
final int nLinearBins = readInteger();
|
||||||
|
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||||
|
skipBytes(8 * nLinearBins);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sequenceStartCache[referenceSequence] = position();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private void openIndexFile() {
|
||||||
|
try {
|
||||||
|
fileStream = new SeekableFileStream(mFile);
|
||||||
|
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
|
||||||
|
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
|
||||||
|
fileLength=bufferedStream.length();
|
||||||
|
}
|
||||||
|
catch (IOException exc) {
|
||||||
|
throw new ReviewedGATKException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void closeIndexFile() {
|
||||||
|
try {
|
||||||
|
bufferedStream.close();
|
||||||
|
baiStream.close();
|
||||||
|
fileStream.close();
|
||||||
|
fileLength = -1;
|
||||||
|
}
|
||||||
|
catch (IOException exc) {
|
||||||
|
throw new ReviewedGATKException("Unable to close index file " + mFile, exc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
||||||
|
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||||
|
|
||||||
|
private byte[] readBytes(int count) {
|
||||||
|
final ByteBuffer buffer = getBuffer(count);
|
||||||
|
read(buffer);
|
||||||
|
buffer.flip();
|
||||||
|
byte[] contents = new byte[count];
|
||||||
|
buffer.get(contents);
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int readInteger() {
|
||||||
|
final ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
|
||||||
|
read(buffer);
|
||||||
|
buffer.flip();
|
||||||
|
return buffer.getInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
||||||
|
* @param count Number of longs to read.
|
||||||
|
* @return An array of longs. Size of array should match count.
|
||||||
|
*/
|
||||||
|
private long[] readLongs(final int count) {
|
||||||
|
final ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
|
||||||
|
read(buffer);
|
||||||
|
buffer.flip();
|
||||||
|
long[] result = new long[count];
|
||||||
|
for(int i = 0; i < count; i++)
|
||||||
|
result[i] = buffer.getLong();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void read(final ByteBuffer buffer) {
|
||||||
|
final int bytesRequested = buffer.limit();
|
||||||
|
if (bytesRequested == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
||||||
|
//and put the result into the bytebuffer after the if statement.
|
||||||
|
|
||||||
|
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||||
|
// our buffer to -- if there isn't enough data in the file, the index
|
||||||
|
// must be truncated or otherwise corrupt:
|
||||||
|
if(bytesRequested > fileLength - bufferedStream.position()){
|
||||||
|
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||||
|
"It's likely that this file is truncated or corrupt -- " +
|
||||||
|
"Please try re-indexing the corresponding BAM file.",
|
||||||
|
mFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
int bytesRead = bufferedStream.read(byteArray, 0, bytesRequested);
|
||||||
|
|
||||||
|
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||||
|
// our buffer to -- if we encounter EOF (-1), the index
|
||||||
|
// must be truncated or otherwise corrupt:
|
||||||
|
if (bytesRead <= 0) {
|
||||||
|
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||||
|
"It's likely that this file is truncated or corrupt -- " +
|
||||||
|
"Please try re-indexing the corresponding BAM file.",
|
||||||
|
mFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(bytesRead != bytesRequested)
|
||||||
|
throw new RuntimeException("Read amount different from requested amount. This should not happen.");
|
||||||
|
|
||||||
|
buffer.put(byteArray, 0, bytesRequested);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedGATKException("Index: unable to read bytes from index file " + mFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A reusable buffer for use by this index generator.
|
||||||
|
* TODO: Should this be a SoftReference?
|
||||||
|
*/
|
||||||
|
private ByteBuffer buffer = null;
|
||||||
|
|
||||||
|
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
||||||
|
private byte[] byteArray=null;
|
||||||
|
private ByteBuffer getBuffer(final int size) {
|
||||||
|
if(buffer == null || buffer.capacity() < size) {
|
||||||
|
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||||
|
buffer = ByteBuffer.allocate(size);
|
||||||
|
byteArray = new byte[size];
|
||||||
|
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
}
|
||||||
|
buffer.clear();
|
||||||
|
buffer.limit(size);
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void skipBytes(final int count) {
|
||||||
|
try {
|
||||||
|
|
||||||
|
//try to skip forward the requested amount.
|
||||||
|
final long skipped = bufferedStream.skip(count);
|
||||||
|
|
||||||
|
if( skipped != count ) { //if not managed to skip the requested amount
|
||||||
|
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void seek(final long position) {
|
||||||
|
try {
|
||||||
|
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
||||||
|
bufferedStream.seek(position);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedGATKException("Index: unable to reposition of file channel of index file " + mFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the position from the current file channel.
|
||||||
|
* @return position of the current file channel.
|
||||||
|
*/
|
||||||
|
private long position() {
|
||||||
|
try {
|
||||||
|
return bufferedStream.position();
|
||||||
|
}
|
||||||
|
catch (IOException exc) {
|
||||||
|
throw new ReviewedGATKException("Unable to read position from index file " + mFile, exc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,7 +28,10 @@ package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
import htsjdk.samtools.MergingSamRecordIterator;
|
import htsjdk.samtools.MergingSamRecordIterator;
|
||||||
import htsjdk.samtools.SamFileHeaderMerger;
|
import htsjdk.samtools.SamFileHeaderMerger;
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
|
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
|
||||||
|
import htsjdk.samtools.sra.SRAAccession;
|
||||||
|
import htsjdk.samtools.sra.SRAIndexedSequenceFile;
|
||||||
import htsjdk.samtools.util.CloseableIterator;
|
import htsjdk.samtools.util.CloseableIterator;
|
||||||
import htsjdk.samtools.util.CloserUtil;
|
import htsjdk.samtools.util.CloserUtil;
|
||||||
import htsjdk.samtools.util.RuntimeIOException;
|
import htsjdk.samtools.util.RuntimeIOException;
|
||||||
|
|
@ -51,7 +54,6 @@ import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||||
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
||||||
|
|
||||||
|
|
@ -310,7 +312,7 @@ public class SAMDataSource {
|
||||||
|
|
||||||
// Determine the sort order.
|
// Determine the sort order.
|
||||||
for(SAMReaderID readerID: readerIDs) {
|
for(SAMReaderID readerID: readerIDs) {
|
||||||
if (! readerID.getSamFile().canRead() )
|
if (!SRAAccession.isValid(readerID.getSamFile().getPath()) && !readerID.getSamFile().canRead() )
|
||||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(),"file is not present or user does not have appropriate permissions. " +
|
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(),"file is not present or user does not have appropriate permissions. " +
|
||||||
"Please check that the file is present and readable and try again.");
|
"Please check that the file is present and readable and try again.");
|
||||||
|
|
||||||
|
|
@ -377,15 +379,28 @@ public class SAMDataSource {
|
||||||
if (referenceFile == null) {
|
if (referenceFile == null) {
|
||||||
samSequenceDictionary = mergedHeader.getSequenceDictionary();
|
samSequenceDictionary = mergedHeader.getSequenceDictionary();
|
||||||
} else {
|
} else {
|
||||||
samSequenceDictionary = ReferenceSequenceFileFactory.
|
ReferenceSequenceFile ref;
|
||||||
getReferenceSequenceFile(referenceFile).
|
// maybe it is SRA file?
|
||||||
getSequenceDictionary();
|
if (SRAAccession.isValid(referenceFile.getPath())) {
|
||||||
|
ref = new SRAIndexedSequenceFile(new SRAAccession(referenceFile.getPath()));
|
||||||
|
} else {
|
||||||
|
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile);
|
||||||
|
}
|
||||||
|
samSequenceDictionary = ref.getSequenceDictionary();
|
||||||
}
|
}
|
||||||
|
|
||||||
for(SAMReaderID id: readerIDs) {
|
for(SAMReaderID id: readerIDs) {
|
||||||
File indexFile = findIndexFile(id.getSamFile());
|
File indexFile = findIndexFile(id.getSamFile());
|
||||||
if(indexFile != null)
|
if(indexFile != null) {
|
||||||
bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
|
bamIndices.put(id, new GATKBAMIndexFromFile(indexFile, samSequenceDictionary));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if index file is not found, try SamReader indexing interface
|
||||||
|
SamReader reader = readers.getReader(id);
|
||||||
|
if (reader.indexing().hasBrowseableIndex()) {
|
||||||
|
bamIndices.put(id, new GATKBAMIndexFromDataSource(id.getSamFile(), reader.getFileHeader(), reader.indexing().getBrowseableIndex()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resourcePool.releaseReaders(readers);
|
resourcePool.releaseReaders(readers);
|
||||||
|
|
@ -1122,12 +1137,17 @@ public class SAMDataSource {
|
||||||
try {
|
try {
|
||||||
if (threadAllocation.getNumIOThreads() > 0)
|
if (threadAllocation.getNumIOThreads() > 0)
|
||||||
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
||||||
reader = SamReaderFactory.makeDefault()
|
SamReaderFactory factory = SamReaderFactory.makeDefault()
|
||||||
.referenceSequence(referenceFile)
|
|
||||||
.validationStringency(validationStringency)
|
.validationStringency(validationStringency)
|
||||||
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
|
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false);
|
||||||
|
|
||||||
|
if (SRAAccession.isValid(readerID.getSamFile().getPath())) {
|
||||||
|
reader = factory.open(SamInputResource.of(new SRAAccession(readerID.getSamFile().getPath())));
|
||||||
|
} else {
|
||||||
|
reader = factory.referenceSequence(referenceFile)
|
||||||
.setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true)
|
.setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true)
|
||||||
.open(readerID.getSamFile());
|
.open(readerID.getSamFile());
|
||||||
|
}
|
||||||
|
|
||||||
} catch ( RuntimeIOException e ) {
|
} catch ( RuntimeIOException e ) {
|
||||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.gatk.engine.datasources.reads.utilities;
|
package org.broadinstitute.gatk.engine.datasources.reads.utilities;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
|
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Input;
|
import org.broadinstitute.gatk.utils.commandline.Input;
|
||||||
|
|
@ -88,12 +89,12 @@ public class FindLargeShards extends CommandLineProgram {
|
||||||
@Override
|
@Override
|
||||||
public int execute() throws IOException {
|
public int execute() throws IOException {
|
||||||
// initialize reference
|
// initialize reference
|
||||||
IndexedFastaSequenceFile refReader = new IndexedFastaSequenceFile(referenceFile);
|
final ReferenceSequenceFile refReader = new IndexedFastaSequenceFile(referenceFile);
|
||||||
GenomeLocParser genomeLocParser = new GenomeLocParser(refReader);
|
final GenomeLocParser genomeLocParser = new GenomeLocParser(refReader);
|
||||||
|
|
||||||
// initialize reads
|
// initialize reads
|
||||||
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
||||||
SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
final SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
||||||
|
|
||||||
// intervals
|
// intervals
|
||||||
final GenomeLocSortedSet intervalSortedSet;
|
final GenomeLocSortedSet intervalSortedSet;
|
||||||
|
|
@ -124,12 +125,12 @@ public class FindLargeShards extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N
|
// Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N
|
||||||
long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
|
final long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
|
||||||
long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
|
final long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
|
||||||
logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size = %d%n",numberOfShards,mean,stddev));
|
logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size = %d%n",numberOfShards,mean,stddev));
|
||||||
|
|
||||||
// Crank through the shards again, this time reporting on the shards significantly larger than the mean.
|
// Crank through the shards again, this time reporting on the shards significantly larger than the mean.
|
||||||
long threshold = mean + stddev*5;
|
final long threshold = mean + stddev*5;
|
||||||
logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
|
logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
|
||||||
out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");
|
out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.reference;
|
package org.broadinstitute.gatk.engine.datasources.reference;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.LocusShard;
|
import org.broadinstitute.gatk.engine.datasources.reads.LocusShard;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||||
|
|
@ -46,7 +46,7 @@ import java.util.List;
|
||||||
* Looks for fai and dict files, and tries to create them if they don't exist
|
* Looks for fai and dict files, and tries to create them if they don't exist
|
||||||
*/
|
*/
|
||||||
public class ReferenceDataSource {
|
public class ReferenceDataSource {
|
||||||
private IndexedFastaSequenceFile reference;
|
private ReferenceSequenceFile reference;
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
|
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
|
||||||
|
|
@ -55,7 +55,7 @@ public class ReferenceDataSource {
|
||||||
* Create reference data source from fasta file
|
* Create reference data source from fasta file
|
||||||
* @param fastaFile Fasta file to be used as reference
|
* @param fastaFile Fasta file to be used as reference
|
||||||
*/
|
*/
|
||||||
public ReferenceDataSource(File fastaFile) {
|
public ReferenceDataSource(final File fastaFile) {
|
||||||
reference = CachingIndexedFastaSequenceFile.checkAndCreate(fastaFile);
|
reference = CachingIndexedFastaSequenceFile.checkAndCreate(fastaFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class ReferenceDataSource {
|
||||||
* Get indexed fasta file
|
* Get indexed fasta file
|
||||||
* @return IndexedFastaSequenceFile that was created from file
|
* @return IndexedFastaSequenceFile that was created from file
|
||||||
*/
|
*/
|
||||||
public IndexedFastaSequenceFile getReference() {
|
public ReferenceSequenceFile getReference() {
|
||||||
return this.reference;
|
return this.reference;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.executive;
|
package org.broadinstitute.gatk.engine.executive;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.TribbleException;
|
import htsjdk.tribble.TribbleException;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
||||||
|
|
@ -111,7 +111,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
protected HierarchicalMicroScheduler(final GenomeAnalysisEngine engine,
|
protected HierarchicalMicroScheduler(final GenomeAnalysisEngine engine,
|
||||||
final Walker walker,
|
final Walker walker,
|
||||||
final SAMDataSource reads,
|
final SAMDataSource reads,
|
||||||
final IndexedFastaSequenceFile reference,
|
final ReferenceSequenceFile reference,
|
||||||
final Collection<ReferenceOrderedDataSource> rods,
|
final Collection<ReferenceOrderedDataSource> rods,
|
||||||
final ThreadAllocation threadAllocation) {
|
final ThreadAllocation threadAllocation) {
|
||||||
super(engine, walker, reads, reference, rods, threadAllocation);
|
super(engine, walker, reads, reference, rods, threadAllocation);
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.executive;
|
package org.broadinstitute.gatk.engine.executive;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
|
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
|
||||||
import org.broadinstitute.gatk.engine.datasources.providers.ReadShardDataProvider;
|
import org.broadinstitute.gatk.engine.datasources.providers.ReadShardDataProvider;
|
||||||
|
|
@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
protected LinearMicroScheduler(final GenomeAnalysisEngine engine,
|
protected LinearMicroScheduler(final GenomeAnalysisEngine engine,
|
||||||
final Walker walker,
|
final Walker walker,
|
||||||
final SAMDataSource reads,
|
final SAMDataSource reads,
|
||||||
final IndexedFastaSequenceFile reference,
|
final ReferenceSequenceFile reference,
|
||||||
final Collection<ReferenceOrderedDataSource> rods,
|
final Collection<ReferenceOrderedDataSource> rods,
|
||||||
final ThreadAllocation threadAllocation) {
|
final ThreadAllocation threadAllocation) {
|
||||||
super(engine, walker, reads, reference, rods, threadAllocation);
|
super(engine, walker, reads, reference, rods, threadAllocation);
|
||||||
|
|
@ -84,7 +84,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
Accumulator accumulator = Accumulator.create(engine,walker);
|
Accumulator accumulator = Accumulator.create(engine,walker);
|
||||||
|
|
||||||
boolean done = walker.isDone();
|
boolean done = walker.isDone();
|
||||||
int counter = 0;
|
|
||||||
|
|
||||||
final TraversalEngine traversalEngine = borrowTraversalEngine(this);
|
final TraversalEngine traversalEngine = borrowTraversalEngine(this);
|
||||||
for (Shard shard : shardStrategy ) {
|
for (Shard shard : shardStrategy ) {
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
package org.broadinstitute.gatk.engine.executive;
|
package org.broadinstitute.gatk.engine.executive;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.ReadMetrics;
|
import org.broadinstitute.gatk.engine.ReadMetrics;
|
||||||
|
|
@ -104,7 +104,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
*/
|
*/
|
||||||
protected final GenomeAnalysisEngine engine;
|
protected final GenomeAnalysisEngine engine;
|
||||||
|
|
||||||
protected final IndexedFastaSequenceFile reference;
|
protected final ReferenceSequenceFile reference;
|
||||||
|
|
||||||
private final SAMDataSource reads;
|
private final SAMDataSource reads;
|
||||||
protected final Collection<ReferenceOrderedDataSource> rods;
|
protected final Collection<ReferenceOrderedDataSource> rods;
|
||||||
|
|
@ -131,7 +131,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
*
|
*
|
||||||
* @return The best-fit microscheduler.
|
* @return The best-fit microscheduler.
|
||||||
*/
|
*/
|
||||||
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
|
public static MicroScheduler create(final GenomeAnalysisEngine engine, final Walker walker, final SAMDataSource reads, final ReferenceSequenceFile reference,
|
||||||
|
final Collection<ReferenceOrderedDataSource> rods, final ThreadAllocation threadAllocation) {
|
||||||
if ( threadAllocation.isRunningInParallelMode() ) {
|
if ( threadAllocation.isRunningInParallelMode() ) {
|
||||||
logger.info(String.format("Running the GATK in parallel mode with %d total threads, " +
|
logger.info(String.format("Running the GATK in parallel mode with %d total threads, " +
|
||||||
"%d CPU thread(s) for each of %d data thread(s), of %d processors available on this machine",
|
"%d CPU thread(s) for each of %d data thread(s), of %d processors available on this machine",
|
||||||
|
|
@ -183,7 +184,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
protected MicroScheduler(final GenomeAnalysisEngine engine,
|
protected MicroScheduler(final GenomeAnalysisEngine engine,
|
||||||
final Walker walker,
|
final Walker walker,
|
||||||
final SAMDataSource reads,
|
final SAMDataSource reads,
|
||||||
final IndexedFastaSequenceFile reference,
|
final ReferenceSequenceFile reference,
|
||||||
final Collection<ReferenceOrderedDataSource> rods,
|
final Collection<ReferenceOrderedDataSource> rods,
|
||||||
final ThreadAllocation threadAllocation) {
|
final ThreadAllocation threadAllocation) {
|
||||||
this.engine = engine;
|
this.engine = engine;
|
||||||
|
|
@ -397,7 +398,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
* Returns the reference maintained by this scheduler.
|
* Returns the reference maintained by this scheduler.
|
||||||
* @return The reference maintained by this scheduler.
|
* @return The reference maintained by this scheduler.
|
||||||
*/
|
*/
|
||||||
public IndexedFastaSequenceFile getReference() { return reference; }
|
public ReferenceSequenceFile getReference() { return reference; }
|
||||||
|
|
||||||
protected void cleanup() {
|
protected void cleanup() {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.filters;
|
package org.broadinstitute.gatk.engine.filters;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.WalkerManager;
|
import org.broadinstitute.gatk.engine.WalkerManager;
|
||||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||||
|
|
@ -41,7 +41,7 @@ import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
*/
|
*/
|
||||||
public class BAQReadTransformer extends ReadTransformer {
|
public class BAQReadTransformer extends ReadTransformer {
|
||||||
private BAQ baqHMM;
|
private BAQ baqHMM;
|
||||||
private IndexedFastaSequenceFile refReader;
|
private ReferenceSequenceFile refReader;
|
||||||
private BAQ.CalculationMode cmode;
|
private BAQ.CalculationMode cmode;
|
||||||
private BAQ.QualityMode qmode;
|
private BAQ.QualityMode qmode;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.engine.walkers;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.Feature;
|
import htsjdk.tribble.Feature;
|
||||||
import org.broadinstitute.gatk.utils.commandline.*;
|
import org.broadinstitute.gatk.utils.commandline.*;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
|
@ -181,7 +182,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
// Map over the ActiveRegion
|
// Map over the ActiveRegion
|
||||||
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||||
|
|
||||||
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, final ReferenceSequenceFile reference ) {
|
||||||
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionTraversalParameters.class).extension();
|
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionTraversalParameters.class).extension();
|
||||||
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||||
for( final GenomeLoc interval : intervals.toList() ) {
|
for( final GenomeLoc interval : intervals.toList() ) {
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine;
|
package org.broadinstitute.gatk.engine;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||||
|
|
@ -82,7 +82,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
// Test the accuracy of the read metrics
|
// Test the accuracy of the read metrics
|
||||||
|
|
||||||
private File referenceFile;
|
private File referenceFile;
|
||||||
private IndexedFastaSequenceFile reference;
|
private ReferenceSequenceFile reference;
|
||||||
private SAMSequenceDictionary dictionary;
|
private SAMSequenceDictionary dictionary;
|
||||||
private SAMFileHeader header;
|
private SAMFileHeader header;
|
||||||
private GATKSAMReadGroupRecord readGroup;
|
private GATKSAMReadGroupRecord readGroup;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||||
|
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.Feature;
|
import htsjdk.tribble.Feature;
|
||||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
|
|
@ -71,7 +72,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
/**
|
/**
|
||||||
* Sequence file.
|
* Sequence file.
|
||||||
*/
|
*/
|
||||||
private static IndexedFastaSequenceFile seq;
|
private static ReferenceSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||||
|
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
|
@ -57,7 +57,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
||||||
/**
|
/**
|
||||||
* The fasta, for comparison.
|
* The fasta, for comparison.
|
||||||
*/
|
*/
|
||||||
protected IndexedFastaSequenceFile sequenceFile = null;
|
protected ReferenceSequenceFile sequenceFile = null;
|
||||||
protected GenomeLocParser genomeLocParser = null;
|
protected GenomeLocParser genomeLocParser = null;
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.GATKBAMFileSpan;
|
import htsjdk.samtools.GATKBAMFileSpan;
|
||||||
import htsjdk.samtools.GATKChunk;
|
import htsjdk.samtools.GATKChunk;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
|
@ -45,7 +45,7 @@ import java.io.FileNotFoundException;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class FilePointerUnitTest extends BaseTest {
|
public class FilePointerUnitTest extends BaseTest {
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
private SAMReaderID readerID = new SAMReaderID("samFile",new Tags());
|
private SAMReaderID readerID = new SAMReaderID("samFile",new Tags());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,98 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2012-2016 Broad Institute, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
|
import htsjdk.samtools.SAMFileReader;
|
||||||
|
import htsjdk.samtools.SamReader;
|
||||||
|
import htsjdk.samtools.SamReaderFactory;
|
||||||
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test basic functionality in the GATK's wrapper around htsjdk.samtools.BrowseableBAMIndex.
|
||||||
|
*/
|
||||||
|
public class GATKBAMIndexFromDataSourceUnitTest extends BaseTest {
|
||||||
|
private static File bamFile = new File(validationDataLocation+"MV1994.selected.bam");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Storage for the index itself.
|
||||||
|
*/
|
||||||
|
private GATKBAMIndex bamIndex;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void init() throws IOException {
|
||||||
|
final SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
|
reader.enableIndexCaching(true); // needed ot get BrowseableBAMIndex
|
||||||
|
Assert.assertTrue(reader.hasIndex());
|
||||||
|
Assert.assertTrue(reader.indexing().hasBrowseableIndex());
|
||||||
|
|
||||||
|
bamIndex = new GATKBAMIndexFromDataSource(bamFile, reader.getFileHeader(), reader.indexing().getBrowseableIndex());
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNumberAndSizeOfIndexLevels() {
|
||||||
|
// The correct values for this test are pulled directly from the
|
||||||
|
// SAM Format Specification v1.3-r882, Section 4.1.1, last paragraph.
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getNumIndexLevels(),6,"Incorrect number of levels in BAM index");
|
||||||
|
|
||||||
|
// Level 0
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(0),0);
|
||||||
|
Assert.assertEquals(bamIndex.getLevelSize(0),1);
|
||||||
|
|
||||||
|
// Level 1
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(1),1);
|
||||||
|
Assert.assertEquals(bamIndex.getLevelSize(1),8-1+1);
|
||||||
|
|
||||||
|
// Level 2
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(2),9);
|
||||||
|
Assert.assertEquals(bamIndex.getLevelSize(2),72-9+1);
|
||||||
|
|
||||||
|
// Level 3
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(3),73);
|
||||||
|
Assert.assertEquals(bamIndex.getLevelSize(3),584-73+1);
|
||||||
|
|
||||||
|
// Level 4
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(4),585);
|
||||||
|
Assert.assertEquals(bamIndex.getLevelSize(4),4680-585+1);
|
||||||
|
|
||||||
|
// Level 5
|
||||||
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(5),4681);
|
||||||
|
// Need to wait unitl AbstractBAMFileIndex.getLevelSize() is fixed, will throw an ArrayIndexOutOfBoundsException if 1 before the end of the GenomicIndexUtil.LEVEL_STARTS array
|
||||||
|
//Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test( expectedExceptions = ReviewedGATKException.class )
|
||||||
|
public void testTooManyReferenceSequencesDataSource() {
|
||||||
|
final GATKBAMIndexData index = bamIndex.readReferenceSequence(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
|
||||||
/**
|
/**
|
||||||
* Test basic functionality in the GATK's implementation of the BAM index classes.
|
* Test basic functionality in the GATK's implementation of the BAM index classes.
|
||||||
*/
|
*/
|
||||||
public class GATKBAMIndexUnitTest extends BaseTest {
|
public class GATKBAMIndexFromFileUnitTest extends BaseTest {
|
||||||
private static File bamFile = new File(validationDataLocation+"MV1994.selected.bam");
|
private static File bamFile = new File(validationDataLocation+"MV1994.selected.bam");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -60,11 +60,11 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void init() throws FileNotFoundException {
|
public void init() throws FileNotFoundException {
|
||||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
final SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
|
bamIndex = new GATKBAMIndexFromFile(bamIndexFile, sequenceDictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -100,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||||
public void testDetectTruncatedBamIndexWordBoundary() {
|
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
|
final GATKBAMIndex index = new GATKBAMIndexFromFile(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
|
||||||
index.readReferenceSequence(0);
|
index.readReferenceSequence(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||||
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
public void testDetectTruncatedBamIndeFromFilexNonWordBoundary() {
|
||||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
|
final GATKBAMIndex index = new GATKBAMIndexFromFile(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
|
||||||
index.readReferenceSequence(0);
|
index.readReferenceSequence(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||||
|
|
@ -64,7 +64,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
||||||
|
|
||||||
private List<SAMReaderID> readers;
|
private List<SAMReaderID> readers;
|
||||||
private File referenceFile;
|
private File referenceFile;
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.rmd;
|
package org.broadinstitute.gatk.engine.datasources.rmd;
|
||||||
|
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -68,7 +69,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
||||||
private RMDTriplet triplet = null;
|
private RMDTriplet triplet = null;
|
||||||
private RMDTrackBuilder builder = null;
|
private RMDTrackBuilder builder = null;
|
||||||
|
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private GenomeLoc testSite1;
|
private GenomeLoc testSite1;
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.datasources.rmd;
|
package org.broadinstitute.gatk.engine.datasources.rmd;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.Feature;
|
import htsjdk.tribble.Feature;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
|
|
@ -46,7 +46,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{
|
||||||
// Build up query parameters
|
// Build up query parameters
|
||||||
File file = new File(BaseTest.privateTestDir + "NA12878.hg19.example1.vcf");
|
File file = new File(BaseTest.privateTestDir + "NA12878.hg19.example1.vcf");
|
||||||
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
|
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
|
||||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||||
GenomeLoc loc = parser.createGenomeLoc("20", 1, 100000);
|
GenomeLoc loc = parser.createGenomeLoc("20", 1, 100000);
|
||||||
TestRMDTrackBuilder builder = new TestRMDTrackBuilder(seq.getSequenceDictionary(), parser);
|
TestRMDTrackBuilder builder = new TestRMDTrackBuilder(seq.getSequenceDictionary(), parser);
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.traversals;
|
package org.broadinstitute.gatk.engine.traversals;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
||||||
|
|
@ -43,7 +43,7 @@ import java.util.List;
|
||||||
|
|
||||||
public class TAROrderedReadCacheUnitTest extends BaseTest {
|
public class TAROrderedReadCacheUnitTest extends BaseTest {
|
||||||
// example fasta index file, can be deleted if you don't use the reference
|
// example fasta index file, can be deleted if you don't use the reference
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.engine.traversals;
|
||||||
|
|
||||||
import com.google.java.contract.PreconditionError;
|
import com.google.java.contract.PreconditionError;
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||||
import org.broadinstitute.gatk.engine.datasources.reads.*;
|
import org.broadinstitute.gatk.engine.datasources.reads.*;
|
||||||
|
|
@ -39,7 +40,6 @@ import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
|
||||||
import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
|
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
|
||||||
import org.broadinstitute.gatk.utils.sam.*;
|
import org.broadinstitute.gatk.utils.sam.*;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
|
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
|
||||||
|
|
@ -80,7 +80,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private File referenceFile;
|
private File referenceFile;
|
||||||
private IndexedFastaSequenceFile reference;
|
private ReferenceSequenceFile reference;
|
||||||
private SAMSequenceDictionary dictionary;
|
private SAMSequenceDictionary dictionary;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.engine.traversals;
|
package org.broadinstitute.gatk.engine.traversals;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.engine.walkers.TestCountReadsWalker;
|
import org.broadinstitute.gatk.engine.walkers.TestCountReadsWalker;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
|
|
@ -90,7 +89,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
||||||
private File output;
|
private File output;
|
||||||
private TraverseReadsNano traversalEngine = null;
|
private TraverseReadsNano traversalEngine = null;
|
||||||
|
|
||||||
private IndexedFastaSequenceFile ref = null;
|
private ReferenceSequenceFile ref = null;
|
||||||
private GenomeLocParser genomeLocParser = null;
|
private GenomeLocParser genomeLocParser = null;
|
||||||
private GenomeAnalysisEngine engine = null;
|
private GenomeAnalysisEngine engine = null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -418,6 +418,7 @@
|
||||||
<queuetest.run>${gatk.queuetests.run}</queuetest.run>
|
<queuetest.run>${gatk.queuetests.run}</queuetest.run>
|
||||||
<java.io.tmpdir>${java.io.tmpdir}</java.io.tmpdir>
|
<java.io.tmpdir>${java.io.tmpdir}</java.io.tmpdir>
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
|
<groups>sra</groups>
|
||||||
</configuration>
|
</configuration>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.qc;
|
package org.broadinstitute.gatk.tools.walkers.qc;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.reference.ReferenceSequence;
|
import htsjdk.samtools.reference.ReferenceSequence;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
|
@ -70,7 +70,7 @@ public class QCRef extends RefWalker<Integer, Integer> {
|
||||||
|
|
||||||
String contigName = "";
|
String contigName = "";
|
||||||
int contigStart, contigEnd;
|
int contigStart, contigEnd;
|
||||||
IndexedFastaSequenceFile uncachedRef;
|
ReferenceSequenceFile uncachedRef;
|
||||||
byte[] uncachedBases;
|
byte[] uncachedBases;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.tools.walkers.varianteval;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.samtools.util.IntervalTree;
|
import htsjdk.samtools.util.IntervalTree;
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -282,7 +283,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
||||||
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
|
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
|
||||||
|
|
||||||
// Ancestral alignments
|
// Ancestral alignments
|
||||||
private IndexedFastaSequenceFile ancestralAlignments = null;
|
private ReferenceSequenceFile ancestralAlignments = null;
|
||||||
|
|
||||||
// The set of all possible evaluation contexts
|
// The set of all possible evaluation contexts
|
||||||
StratificationManager<VariantStratifier, EvaluationContext> stratManager;
|
StratificationManager<VariantStratifier, EvaluationContext> stratManager;
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.filters;
|
package org.broadinstitute.gatk.tools.walkers.filters;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
|
@ -51,7 +51,7 @@ public class VariantFiltrationUnitTest extends BaseTest {
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void before() {
|
public void before() {
|
||||||
// Create GenomeLoc
|
// Create GenomeLoc
|
||||||
IndexedFastaSequenceFile fasta = CachingIndexedFastaSequenceFile.checkAndCreate(new File(privateTestDir + "iupacFASTA.fasta"));
|
ReferenceSequenceFile fasta = CachingIndexedFastaSequenceFile.checkAndCreate(new File(privateTestDir + "iupacFASTA.fasta"));
|
||||||
GenomeLocParser genomeLocParser = new GenomeLocParser(fasta);
|
GenomeLocParser genomeLocParser = new GenomeLocParser(fasta);
|
||||||
chr1 = fasta.getSequenceDictionary().getSequence(0).getSequenceName();
|
chr1 = fasta.getSequenceDictionary().getSequence(0).getSequenceName();
|
||||||
genomeLoc = genomeLocParser.createGenomeLoc(chr1, 5, 10);
|
genomeLoc = genomeLocParser.createGenomeLoc(chr1, 5, 10);
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||||
|
|
@ -44,7 +44,7 @@ public class IndelRealignerUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2012-2016 Broad Institute, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.tools.walkers.readutils;
|
||||||
|
|
||||||
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
|
import org.testng.SkipException;
|
||||||
|
import org.testng.annotations.BeforeGroups;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import htsjdk.samtools.sra.SRAAccession;
|
||||||
|
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
|
public class SRAPrintReadsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
private static class PRTest {
|
||||||
|
final String accession;
|
||||||
|
final String args;
|
||||||
|
final String md5;
|
||||||
|
|
||||||
|
private PRTest(String accession, String args, String md5) {
|
||||||
|
this.accession = accession;
|
||||||
|
this.args = args;
|
||||||
|
this.md5 = md5;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("PRTest(accession='%s', args='%s')", accession, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean canResolveNetworkAccession = false;
|
||||||
|
private static String checkAccession = "SRR000123";
|
||||||
|
private static final String REMOTE_ACCESSION_PATTERN = "^[SED]RR[0-9]{6,9}$";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are the SRA native libraries loaded and initialized? Does the test accession have a valid name?
|
||||||
|
*/
|
||||||
|
@BeforeGroups(groups = {"sra"})
|
||||||
|
public final void checkIfCanResolve() {
|
||||||
|
// Did SRA successfully load the native libraries and are fully initialized?
|
||||||
|
if (!SRAAccession.isSupported()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is this is a valid SRA accession?
|
||||||
|
canResolveNetworkAccession = SRAAccession.isValid(checkAccession);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are the SRA native libraries loaded and initialized?
|
||||||
|
*
|
||||||
|
* @throws SkipException if the SRA native libraries are loaded and initialized
|
||||||
|
*/
|
||||||
|
@BeforeMethod
|
||||||
|
public final void assertSRAIsSupported(){
|
||||||
|
if(!SRAAccession.isSupported()){
|
||||||
|
throw new SkipException("Skipping SRA Test because SRA native code is unavailable.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skip network SRA Test because cannot resolve remote SRA accession
|
||||||
|
*
|
||||||
|
* @param method Provides information about, and access to, a single method on a class or interface
|
||||||
|
* @param params Method parameters
|
||||||
|
* @throws SkipException if cannot resold an SRA accession
|
||||||
|
*/
|
||||||
|
@BeforeMethod
|
||||||
|
public void skipIfCantResolve(Method method, Object[] params) {
|
||||||
|
String accession = null;
|
||||||
|
|
||||||
|
if (params.length > 0) {
|
||||||
|
Object firstParam = params[0];
|
||||||
|
if (firstParam instanceof PRTest) {
|
||||||
|
accession = ((PRTest)firstParam).accession;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (accession != null &&
|
||||||
|
accession.matches(REMOTE_ACCESSION_PATTERN) && !canResolveNetworkAccession) {
|
||||||
|
throw new SkipException("Skipping network SRA Test because cannot resolve remote SRA accession '" +
|
||||||
|
checkAccession + "'.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "PRTest")
|
||||||
|
public Object[][] createPrintReadsTestData() {
|
||||||
|
return new Object[][]{
|
||||||
|
// Test with local SRA accessions
|
||||||
|
{new PRTest(privateTestDir + "ERR1214757.sra", "", "173ed87acc794a704aa000c6ab5d63a8")},
|
||||||
|
{new PRTest(privateTestDir + "ERR1214757.sra", " -L NC_000001.10:1-50000", "6bc055f028c49bcbca990857e57a6e4b")},
|
||||||
|
{new PRTest(privateTestDir + "ERR1214757.sra", " -L NC_000001.10:500000-1000000", "ab545064b2314971cfae7486ff74d779")},
|
||||||
|
// Tests with remote SRA accessions
|
||||||
|
{new PRTest("ERR1214757", "", "173ed87acc794a704aa000c6ab5d63a8")},
|
||||||
|
{new PRTest("ERR1214757", " -L NC_000001.10:1-50000", "6bc055f028c49bcbca990857e57a6e4b")},
|
||||||
|
{new PRTest("ERR1214757", " -L NC_000001.10:500000-1000000", "ab545064b2314971cfae7486ff74d779")},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(groups = "sra", dataProvider = "PRTest")
|
||||||
|
public void testPrintReads(PRTest params) {
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T PrintReads" +
|
||||||
|
" -R " + params.accession +
|
||||||
|
" -I " + params.accession +
|
||||||
|
params.args +
|
||||||
|
" --no_pg_tag" +
|
||||||
|
" -o %s",
|
||||||
|
Collections.singletonList(params.md5));
|
||||||
|
executeTest("testPrintReads-"+params.args, spec).getFirst();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.gatk.utils.activeregion;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Invariant;
|
import com.google.java.contract.Invariant;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocSortedSet;
|
import org.broadinstitute.gatk.utils.GenomeLocSortedSet;
|
||||||
|
|
@ -168,7 +168,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
/**
|
/**
|
||||||
* See #getActiveRegionReference but with padding == 0
|
* See #getActiveRegionReference but with padding == 0
|
||||||
*/
|
*/
|
||||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
|
public byte[] getActiveRegionReference( final ReferenceSequenceFile referenceReader ) {
|
||||||
return getActiveRegionReference(referenceReader, 0);
|
return getActiveRegionReference(referenceReader, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -182,21 +182,21 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
* @return a non-null array of bytes holding the reference bases in referenceReader
|
* @return a non-null array of bytes holding the reference bases in referenceReader
|
||||||
*/
|
*/
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
public byte[] getActiveRegionReference( final ReferenceSequenceFile referenceReader, final int padding ) {
|
||||||
return getReference(referenceReader, padding, extendedLoc);
|
return getReference(referenceReader, padding, extendedLoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* See #getActiveRegionReference but using the span including regions not the extended span
|
* See #getActiveRegionReference but using the span including regions not the extended span
|
||||||
*/
|
*/
|
||||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
public byte[] getFullReference( final ReferenceSequenceFile referenceReader ) {
|
||||||
return getFullReference(referenceReader, 0);
|
return getFullReference(referenceReader, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* See #getActiveRegionReference but using the span including regions not the extended span
|
* See #getActiveRegionReference but using the span including regions not the extended span
|
||||||
*/
|
*/
|
||||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
public byte[] getFullReference( final ReferenceSequenceFile referenceReader, final int padding ) {
|
||||||
return getReference(referenceReader, padding, spanIncludingReads);
|
return getReference(referenceReader, padding, spanIncludingReads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -211,7 +211,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
* @return a non-null array of bytes holding the reference bases in referenceReader
|
* @return a non-null array of bytes holding the reference bases in referenceReader
|
||||||
*/
|
*/
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
public byte[] getReference( final ReferenceSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||||
if ( referenceReader == null ) throw new IllegalArgumentException("referenceReader cannot be null");
|
if ( referenceReader == null ) throw new IllegalArgumentException("referenceReader cannot be null");
|
||||||
if ( padding < 0 ) throw new IllegalArgumentException("padding must be a positive integer but got " + padding);
|
if ( padding < 0 ) throw new IllegalArgumentException("padding must be a positive integer but got " + padding);
|
||||||
if ( genomeLoc == null ) throw new IllegalArgumentException("genomeLoc cannot be null");
|
if ( genomeLoc == null ) throw new IllegalArgumentException("genomeLoc cannot be null");
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,12 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.baq;
|
package org.broadinstitute.gatk.utils.baq;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.reference.ReferenceSequence;
|
import htsjdk.samtools.reference.ReferenceSequence;
|
||||||
import htsjdk.samtools.CigarElement;
|
import htsjdk.samtools.CigarElement;
|
||||||
import htsjdk.samtools.CigarOperator;
|
import htsjdk.samtools.CigarOperator;
|
||||||
import htsjdk.samtools.SAMRecord;
|
import htsjdk.samtools.SAMRecord;
|
||||||
import htsjdk.samtools.SAMUtils;
|
import htsjdk.samtools.SAMUtils;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||||
|
|
@ -151,7 +151,7 @@ public class BAQ {
|
||||||
* @param b band width
|
* @param b band width
|
||||||
* @param minBaseQual All bases with Q < minBaseQual are up'd to this value
|
* @param minBaseQual All bases with Q < minBaseQual are up'd to this value
|
||||||
*/
|
*/
|
||||||
public BAQ(final double d, final double e, final int b, final byte minBaseQual, boolean includeClippedBases) {
|
public BAQ(final double d, final double e, final int b, final byte minBaseQual, final boolean includeClippedBases) {
|
||||||
cd = d; ce = e; cb = b;
|
cd = d; ce = e; cb = b;
|
||||||
this.minBaseQual = minBaseQual;
|
this.minBaseQual = minBaseQual;
|
||||||
this.includeClippedBases = includeClippedBases;
|
this.includeClippedBases = includeClippedBases;
|
||||||
|
|
@ -521,7 +521,7 @@ public class BAQ {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, IndexedFastaSequenceFile refReader) {
|
public BAQCalculationResult calcBAQFromHMM(final SAMRecord read, final ReferenceSequenceFile refReader) {
|
||||||
// start is alignment start - band width / 2 - size of first I element, if there is one. Stop is similar
|
// start is alignment start - band width / 2 - size of first I element, if there is one. Stop is similar
|
||||||
int offset = getBandWidth() / 2;
|
int offset = getBandWidth() / 2;
|
||||||
long readStart = includeClippedBases ? read.getUnclippedStart() : read.getAlignmentStart();
|
long readStart = includeClippedBases ? read.getUnclippedStart() : read.getAlignmentStart();
|
||||||
|
|
@ -540,7 +540,7 @@ public class BAQ {
|
||||||
// final SimpleTimer total = new SimpleTimer();
|
// final SimpleTimer total = new SimpleTimer();
|
||||||
// final SimpleTimer local = new SimpleTimer();
|
// final SimpleTimer local = new SimpleTimer();
|
||||||
// int n = 0;
|
// int n = 0;
|
||||||
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) {
|
public BAQCalculationResult calcBAQFromHMM(final byte[] ref, final byte[] query, final byte[] quals, final int queryStart, final int queryEnd ) {
|
||||||
// total.restart();
|
// total.restart();
|
||||||
if ( queryStart < 0 ) throw new ReviewedGATKException("BUG: queryStart < 0: " + queryStart);
|
if ( queryStart < 0 ) throw new ReviewedGATKException("BUG: queryStart < 0: " + queryStart);
|
||||||
if ( queryEnd < 0 ) throw new ReviewedGATKException("BUG: queryEnd < 0: " + queryEnd);
|
if ( queryEnd < 0 ) throw new ReviewedGATKException("BUG: queryEnd < 0: " + queryEnd);
|
||||||
|
|
@ -564,7 +564,7 @@ public class BAQ {
|
||||||
* @param read
|
* @param read
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private final Pair<Integer,Integer> calculateQueryRange(SAMRecord read) {
|
private final Pair<Integer,Integer> calculateQueryRange(final SAMRecord read) {
|
||||||
int queryStart = -1, queryStop = -1;
|
int queryStart = -1, queryStop = -1;
|
||||||
int readI = 0;
|
int readI = 0;
|
||||||
|
|
||||||
|
|
@ -599,7 +599,7 @@ public class BAQ {
|
||||||
}
|
}
|
||||||
|
|
||||||
// we need to pad ref by at least the bandwidth / 2 on either side
|
// we need to pad ref by at least the bandwidth / 2 on either side
|
||||||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) {
|
public BAQCalculationResult calcBAQFromHMM(final SAMRecord read, final byte[] ref, final int refOffset) {
|
||||||
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
|
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
|
||||||
Pair<Integer, Integer> queryRange = calculateQueryRange(read);
|
Pair<Integer, Integer> queryRange = calculateQueryRange(read);
|
||||||
if ( queryRange == null ) return null; // read has Ns, or is completely clipped away
|
if ( queryRange == null ) return null; // read has Ns, or is completely clipped away
|
||||||
|
|
@ -642,7 +642,7 @@ public class BAQ {
|
||||||
return baqResult;
|
return baqResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte capBaseByBAQ( byte oq, byte bq, int state, int expectedPos ) {
|
public byte capBaseByBAQ( final byte oq, final byte bq, final int state, final int expectedPos ) {
|
||||||
byte b;
|
byte b;
|
||||||
boolean isIndel = stateIsIndel(state);
|
boolean isIndel = stateIsIndel(state);
|
||||||
int pos = stateAlignedPosition(state);
|
int pos = stateAlignedPosition(state);
|
||||||
|
|
@ -664,7 +664,7 @@ public class BAQ {
|
||||||
* @param calculationType
|
* @param calculationType
|
||||||
* @return BQ qualities for use, in case qmode is DONT_MODIFY
|
* @return BQ qualities for use, in case qmode is DONT_MODIFY
|
||||||
*/
|
*/
|
||||||
public byte[] baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, CalculationMode calculationType, QualityMode qmode ) {
|
public byte[] baqRead(final SAMRecord read, final ReferenceSequenceFile refReader, final CalculationMode calculationType, final QualityMode qmode ) {
|
||||||
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
|
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
|
||||||
|
|
||||||
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
|
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
|
||||||
|
|
@ -706,7 +706,7 @@ public class BAQ {
|
||||||
* @param read
|
* @param read
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public boolean excludeReadFromBAQ(SAMRecord read) {
|
public boolean excludeReadFromBAQ(final SAMRecord read) {
|
||||||
// keeping mapped reads, regardless of pairing status, or primary alignment status.
|
// keeping mapped reads, regardless of pairing status, or primary alignment status.
|
||||||
return read.getReadUnmappedFlag() || read.getReadFailsVendorQualityCheckFlag() || read.getDuplicateReadFlag();
|
return read.getReadUnmappedFlag() || read.getReadFailsVendorQualityCheckFlag() || read.getDuplicateReadFlag();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.fasta;
|
package org.broadinstitute.gatk.utils.fasta;
|
||||||
|
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
|
import htsjdk.samtools.sra.SRAAccession;
|
||||||
|
import htsjdk.samtools.sra.SRAIndexedSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import htsjdk.samtools.SAMException;
|
import htsjdk.samtools.SAMException;
|
||||||
import htsjdk.samtools.reference.FastaSequenceIndex;
|
import htsjdk.samtools.reference.FastaSequenceIndex;
|
||||||
|
|
@ -192,8 +195,17 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||||
* Possibly may be better as an overloaded constructor.
|
* Possibly may be better as an overloaded constructor.
|
||||||
* @param fastaFile Fasta file to be used as reference
|
* @param fastaFile Fasta file to be used as reference
|
||||||
* @return A new instance of a CachingIndexedFastaSequenceFile.
|
* @return A new instance of a CachingIndexedFastaSequenceFile.
|
||||||
|
* @throws IllegalArgumentException if Fasta file is null
|
||||||
*/
|
*/
|
||||||
public static CachingIndexedFastaSequenceFile checkAndCreate(final File fastaFile) {
|
public static ReferenceSequenceFile checkAndCreate(final File fastaFile) {
|
||||||
|
if ( fastaFile == null ) {
|
||||||
|
throw new IllegalArgumentException("Fasta file is null");
|
||||||
|
}
|
||||||
|
// maybe it is SRA file?
|
||||||
|
if (SRAAccession.isValid(fastaFile.getPath())) {
|
||||||
|
return new SRAIndexedSequenceFile(new SRAAccession(fastaFile.getPath()));
|
||||||
|
}
|
||||||
|
|
||||||
// does the fasta file exist? check that first...
|
// does the fasta file exist? check that first...
|
||||||
if (!fastaFile.exists())
|
if (!fastaFile.exists())
|
||||||
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
|
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,10 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.locusiterator;
|
package org.broadinstitute.gatk.utils.locusiterator;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileReader;
|
import htsjdk.samtools.SAMFileReader;
|
||||||
import htsjdk.samtools.SAMReadGroupRecord;
|
import htsjdk.samtools.SAMReadGroupRecord;
|
||||||
import htsjdk.samtools.SAMRecordIterator;
|
import htsjdk.samtools.SAMRecordIterator;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||||
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
|
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
|
||||||
|
|
@ -62,7 +62,7 @@ public class LIBSPerformance extends CommandLineProgram {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int execute() throws IOException {
|
public int execute() throws IOException {
|
||||||
final IndexedFastaSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
final ReferenceSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
||||||
|
|
||||||
final SAMFileReader reader = new SAMFileReader(samFile);
|
final SAMFileReader reader = new SAMFileReader(samFile);
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.sam;
|
package org.broadinstitute.gatk.utils.sam;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.*;
|
import htsjdk.samtools.*;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.gatk.utils.NGSPlatform;
|
import org.broadinstitute.gatk.utils.NGSPlatform;
|
||||||
|
|
||||||
|
|
@ -52,7 +52,7 @@ import java.util.*;
|
||||||
public class ArtificialBAMBuilder {
|
public class ArtificialBAMBuilder {
|
||||||
public final static int BAM_SHARD_SIZE = 16384;
|
public final static int BAM_SHARD_SIZE = 16384;
|
||||||
|
|
||||||
private final IndexedFastaSequenceFile reference;
|
private final ReferenceSequenceFile reference;
|
||||||
private final GenomeLocParser parser;
|
private final GenomeLocParser parser;
|
||||||
|
|
||||||
final int nReadsPerLocus;
|
final int nReadsPerLocus;
|
||||||
|
|
@ -73,7 +73,7 @@ public class ArtificialBAMBuilder {
|
||||||
|
|
||||||
SAMFileHeader header;
|
SAMFileHeader header;
|
||||||
|
|
||||||
public ArtificialBAMBuilder(final IndexedFastaSequenceFile reference, int nReadsPerLocus, int nLoci) {
|
public ArtificialBAMBuilder(final ReferenceSequenceFile reference, int nReadsPerLocus, int nLoci) {
|
||||||
this.nReadsPerLocus = nReadsPerLocus;
|
this.nReadsPerLocus = nReadsPerLocus;
|
||||||
this.nLoci = nLoci;
|
this.nLoci = nLoci;
|
||||||
|
|
||||||
|
|
@ -94,7 +94,7 @@ public class ArtificialBAMBuilder {
|
||||||
createAndSetHeader(1);
|
createAndSetHeader(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public IndexedFastaSequenceFile getReference() {
|
public ReferenceSequenceFile getReference() {
|
||||||
return reference;
|
return reference;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.text;
|
package org.broadinstitute.gatk.utils.text;
|
||||||
|
|
||||||
|
import htsjdk.samtools.sra.SRAAccession;
|
||||||
import org.broadinstitute.gatk.utils.commandline.ParsingEngine;
|
import org.broadinstitute.gatk.utils.commandline.ParsingEngine;
|
||||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||||
|
|
@ -59,6 +60,13 @@ public class ListFileUtils {
|
||||||
* @return a flattened list of the bam files provided
|
* @return a flattened list of the bam files provided
|
||||||
*/
|
*/
|
||||||
public static List<SAMReaderID> unpackBAMFileList(final List<String> samFiles, final ParsingEngine parser) {
|
public static List<SAMReaderID> unpackBAMFileList(final List<String> samFiles, final ParsingEngine parser) {
|
||||||
|
if ( samFiles == null ) {
|
||||||
|
throw new IllegalArgumentException("The SAM files are null");
|
||||||
|
}
|
||||||
|
if ( parser == null ) {
|
||||||
|
throw new IllegalArgumentException("The parser is null");
|
||||||
|
}
|
||||||
|
|
||||||
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
||||||
for( String inputFileName: samFiles ) {
|
for( String inputFileName: samFiles ) {
|
||||||
Tags inputFileNameTags = parser.getTags(inputFileName);
|
Tags inputFileNameTags = parser.getTags(inputFileName);
|
||||||
|
|
@ -79,10 +87,14 @@ public class ListFileUtils {
|
||||||
else if(inputFileName.endsWith("stdin")) {
|
else if(inputFileName.endsWith("stdin")) {
|
||||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||||
}
|
}
|
||||||
|
else if(SRAAccession.isValid(inputFileName)) {
|
||||||
|
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension and lists of BAM/CRAM files " +
|
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension " + "" +
|
||||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM/CRAM file or list " +
|
"or SRA archives and lists of BAM/CRAM/SRA files with the .list extension, but the file %s has " +
|
||||||
"of BAM/CRAM files is in the correct format, update the extension, and try again.",inputFileName));
|
"neither extension and is not SRA accession. Please ensure that your BAM/CRAM file or list " +
|
||||||
|
"of BAM/CRAM files is in the correct format, update the extension, and try again.", inputFileName));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return unpackedReads;
|
return unpackedReads;
|
||||||
|
|
|
||||||
|
|
@ -29,10 +29,10 @@ package org.broadinstitute.gatk.utils;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
import htsjdk.samtools.SAMFileReader;
|
import htsjdk.samtools.SAMFileReader;
|
||||||
import htsjdk.samtools.SAMRecord;
|
import htsjdk.samtools.SAMRecord;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -57,7 +57,7 @@ public class ExampleToCopyUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
// example fasta index file, can be deleted if you don't use the reference
|
// example fasta index file, can be deleted if you don't use the reference
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.gatk.utils;
|
||||||
|
|
||||||
import com.google.caliper.Param;
|
import com.google.caliper.Param;
|
||||||
import com.google.caliper.SimpleBenchmark;
|
import com.google.caliper.SimpleBenchmark;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -36,7 +36,7 @@ import java.io.File;
|
||||||
* Caliper microbenchmark of genome loc parser
|
* Caliper microbenchmark of genome loc parser
|
||||||
*/
|
*/
|
||||||
public class GenomeLocParserBenchmark extends SimpleBenchmark {
|
public class GenomeLocParserBenchmark extends SimpleBenchmark {
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private final int ITERATIONS = 1000000;
|
private final int ITERATIONS = 1000000;
|
||||||
|
|
||||||
@Param({"NEW", "NONE"})
|
@Param({"NEW", "NONE"})
|
||||||
|
|
|
||||||
|
|
@ -28,10 +28,9 @@ package org.broadinstitute.gatk.utils;
|
||||||
|
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
import htsjdk.samtools.SAMReadGroupRecord;
|
import htsjdk.samtools.SAMReadGroupRecord;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||||
|
|
@ -53,7 +52,7 @@ public class NGSPlatformUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
// example fasta index file, can be deleted if you don't use the reference
|
// example fasta index file, can be deleted if you don't use the reference
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void setup() throws FileNotFoundException {
|
public void setup() throws FileNotFoundException {
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,8 @@ package org.broadinstitute.gatk.utils.activeregion;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
|
@ -51,7 +51,7 @@ import java.util.*;
|
||||||
public class ActiveRegionUnitTest extends BaseTest {
|
public class ActiveRegionUnitTest extends BaseTest {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private String contig;
|
private String contig;
|
||||||
private int contigLength;
|
private int contigLength;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.gatk.utils.baq;
|
package org.broadinstitute.gatk.utils.baq;
|
||||||
|
|
||||||
|
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
@ -52,7 +53,7 @@ public class BAQUnitTest extends BaseTest {
|
||||||
private final int startChr = 1;
|
private final int startChr = 1;
|
||||||
private final int numChr = 2;
|
private final int numChr = 2;
|
||||||
private final int chrSize = 1000;
|
private final int chrSize = 1000;
|
||||||
IndexedFastaSequenceFile fasta = null;
|
ReferenceSequenceFile fasta = null;
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void before() {
|
public void before() {
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ package org.broadinstitute.gatk.utils.fasta;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||||
import htsjdk.samtools.reference.ReferenceSequence;
|
import htsjdk.samtools.reference.ReferenceSequence;
|
||||||
import htsjdk.samtools.SAMSequenceRecord;
|
import htsjdk.samtools.SAMSequenceRecord;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.log4j.Priority;
|
import org.apache.log4j.Priority;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
|
|
@ -93,7 +94,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
|
private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
|
||||||
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
|
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
|
||||||
|
|
||||||
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
final ReferenceSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||||
|
|
||||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||||
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
|
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
|
||||||
|
|
@ -123,7 +124,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
// Tests grabbing sequences around a middle cached value.
|
// Tests grabbing sequences around a middle cached value.
|
||||||
@Test(dataProvider = "fastas", enabled = true && ! DEBUG)
|
@Test(dataProvider = "fastas", enabled = true && ! DEBUG)
|
||||||
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
|
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
|
||||||
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
final ReferenceSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
|
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
|
||||||
|
|
||||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||||
|
|
@ -191,7 +192,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
// make sure some bases are lower case and some are upper case
|
// make sure some bases are lower case and some are upper case
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
|
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
|
||||||
final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
|
final ReferenceSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
|
||||||
final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
|
final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
|
||||||
final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
|
final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
|
||||||
|
|
||||||
|
|
@ -208,9 +209,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state");
|
Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state");
|
||||||
}
|
}
|
||||||
|
|
||||||
private int testCases(final IndexedFastaSequenceFile original,
|
private int testCases(final ReferenceSequenceFile original,
|
||||||
final IndexedFastaSequenceFile casePreserving,
|
final ReferenceSequenceFile casePreserving,
|
||||||
final IndexedFastaSequenceFile allUpper,
|
final ReferenceSequenceFile allUpper,
|
||||||
final String contig, final int start, final int stop ) {
|
final String contig, final int start, final int stop ) {
|
||||||
final String orig = fetchBaseString(original, contig, start, stop);
|
final String orig = fetchBaseString(original, contig, start, stop);
|
||||||
final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
|
final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
|
||||||
|
|
@ -226,7 +227,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String fetchBaseString(final IndexedFastaSequenceFile reader, final String contig, final int start, final int stop) {
|
private String fetchBaseString(final ReferenceSequenceFile reader, final String contig, final int start, final int stop) {
|
||||||
if ( start == -1 )
|
if ( start == -1 )
|
||||||
return new String(reader.getSequence(contig).getBases());
|
return new String(reader.getSequence(contig).getBases());
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.refdata.tracks;
|
package org.broadinstitute.gatk.utils.refdata.tracks;
|
||||||
|
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.tribble.Feature;
|
import htsjdk.tribble.Feature;
|
||||||
import htsjdk.tribble.FeatureCodec;
|
import htsjdk.tribble.FeatureCodec;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
|
|
@ -66,7 +65,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
||||||
public void setup() {
|
public void setup() {
|
||||||
File referenceFile = new File(b36KGReference);
|
File referenceFile = new File(b36KGReference);
|
||||||
try {
|
try {
|
||||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
genomeLocParser = new GenomeLocParser(seq);
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
manager = new FeatureManager();
|
manager = new FeatureManager();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
package org.broadinstitute.gatk.utils.refdata.tracks;
|
package org.broadinstitute.gatk.utils.refdata.tracks;
|
||||||
|
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.Tribble;
|
import htsjdk.tribble.Tribble;
|
||||||
import htsjdk.tribble.index.Index;
|
import htsjdk.tribble.index.Index;
|
||||||
import htsjdk.tribble.util.LittleEndianOutputStream;
|
import htsjdk.tribble.util.LittleEndianOutputStream;
|
||||||
|
|
@ -54,7 +54,7 @@ import java.nio.channels.FileChannel;
|
||||||
*/
|
*/
|
||||||
public class RMDTrackBuilderUnitTest extends BaseTest {
|
public class RMDTrackBuilderUnitTest extends BaseTest {
|
||||||
private RMDTrackBuilder builder;
|
private RMDTrackBuilder builder;
|
||||||
private IndexedFastaSequenceFile seq;
|
private ReferenceSequenceFile seq;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.utils.refdata.utils;
|
package org.broadinstitute.gatk.utils.refdata.utils;
|
||||||
|
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import htsjdk.tribble.Feature;
|
import htsjdk.tribble.Feature;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
|
|
@ -43,7 +43,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public void testCloseFilePointers() throws IOException {
|
public void testCloseFilePointers() throws IOException {
|
||||||
final String chr = "20";
|
final String chr = "20";
|
||||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||||
File file = new File(privateTestDir + "NA12878.hg19.example1.vcf");
|
File file = new File(privateTestDir + "NA12878.hg19.example1.vcf");
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
|
|
|
||||||
|
|
@ -26,8 +26,8 @@
|
||||||
package org.broadinstitute.gatk.utils.sam;
|
package org.broadinstitute.gatk.utils.sam;
|
||||||
|
|
||||||
import htsjdk.samtools.SAMReadGroupRecord;
|
import htsjdk.samtools.SAMReadGroupRecord;
|
||||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|
||||||
import htsjdk.samtools.SAMFileHeader;
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.gatk.utils.BaseTest;
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||||
import org.broadinstitute.gatk.utils.Utils;
|
import org.broadinstitute.gatk.utils.Utils;
|
||||||
|
|
@ -223,7 +223,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
||||||
@Test (enabled = true)
|
@Test (enabled = true)
|
||||||
public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException {
|
public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException {
|
||||||
|
|
||||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||||
final int readLength = 76;
|
final int readLength = 76;
|
||||||
|
|
||||||
|
|
@ -239,7 +239,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
||||||
@Test (enabled = true)
|
@Test (enabled = true)
|
||||||
public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {
|
public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {
|
||||||
|
|
||||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||||
final int readLength = 76;
|
final int readLength = 76;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue