Added support for directly reading SRA runs
This commit is contained in:
parent
6bca5a060f
commit
a465c87ff8
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.bqsr;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.CigarElement;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.engine.recalibration.*;
|
||||
import org.broadinstitute.gatk.engine.walkers.*;
|
||||
|
|
@ -194,7 +194,7 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to mask out known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||
|
||||
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
|
||||
private IndexedFastaSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation
|
||||
private ReferenceSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation
|
||||
private final static byte NO_BAQ_UNCERTAINTY = (byte)'@';
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import htsjdk.samtools.SAMFileWriter;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
|
|
@ -487,7 +488,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
private HaplotypeCallerGenotypingEngine genotypingEngine = null;
|
||||
|
||||
// fasta reference reader to supplement the edges of the reference sequence
|
||||
protected CachingIndexedFastaSequenceFile referenceReader;
|
||||
protected ReferenceSequenceFile referenceReader;
|
||||
|
||||
// reference base padding size
|
||||
private static final int REFERENCE_PADDING = 500;
|
||||
|
|
@ -683,12 +684,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
vcfWriter.writeHeader(new VCFHeader(headerInfo, sampleSet));
|
||||
|
||||
try {
|
||||
// fasta reference reader to supplement the edges of the reference sequence
|
||||
referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
||||
} catch( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
||||
}
|
||||
// fasta reference reader to supplement the edges of the reference sequence
|
||||
referenceReader = CachingIndexedFastaSequenceFile.checkAndCreate(getToolkit().getArguments().referenceFile);
|
||||
|
||||
// create and setup the assembler
|
||||
assemblyEngine = new ReadThreadingAssembler(RTAC.maxNumHaplotypesInPopulation, RTAC.kmerSizes, RTAC.dontIncreaseKmerSizesForCycles, RTAC.allowNonUniqueKmersInRef, RTAC.numPruningSamples);
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
@ -71,7 +71,7 @@ import java.util.*;
|
|||
|
||||
public class VariantOverlapAnnotatorUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
|
|
|
|||
|
|
@ -76,5 +76,4 @@ public class BaseQualitySumPerAlleleBySampleUnitTest {
|
|||
Assert.assertFalse(a.isUsableRead(read));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.readers.LineIterator;
|
||||
import htsjdk.tribble.readers.PositionalBufferedStream;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
|
@ -207,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
|
||||
try {
|
||||
final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final ReferenceSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary());
|
||||
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -pairHMMSub %s %s -R %s -I %s", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
|
||||
|
|
|
|||
|
|
@ -51,11 +51,11 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.Cigar;
|
||||
import htsjdk.samtools.CigarElement;
|
||||
import htsjdk.samtools.CigarOperator;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
|
|
@ -80,7 +80,7 @@ import java.util.*;
|
|||
|
||||
public class LocalAssemblyEngineUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private SAMFileHeader header;
|
||||
|
||||
@BeforeClass
|
||||
|
|
|
|||
|
|
@ -51,9 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.UnvalidatingGenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -76,7 +75,7 @@ public class PairHMMIndelErrorModelUnitTest extends BaseTest {
|
|||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMSequenceDictionary;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -71,7 +71,7 @@ public class ContigComparatorUnitTest extends BaseTest {
|
|||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
// sequence
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
||||
dictForFails = genomeLocParser.getContigs();
|
||||
}
|
||||
|
|
@ -81,7 +81,7 @@ public class ContigComparatorUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final String ref : Arrays.asList(b37KGReference, hg18Reference) ) {
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(ref));
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(ref));
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(seq);
|
||||
final SAMSequenceDictionary dict = genomeLocParser.getContigs();
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.utils.genotyper;
|
||||
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
|
|
@ -64,7 +65,6 @@ import java.util.Map;
|
|||
import java.util.List;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -81,7 +81,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
|||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
// example fasta index file, can be deleted if you don't use the reference
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.gatk.engine.io.stubs.VCFWriterArgumentTypeDescriptor;
|
|||
import org.broadinstitute.gatk.utils.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
import org.broadinstitute.gatk.utils.text.ListFileUtils;
|
||||
import htsjdk.samtools.sra.SRAAccession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -63,10 +64,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
/**
|
||||
* A list of all the arguments initially used as sources.
|
||||
*/
|
||||
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
||||
private final Collection<Object> argumentSources = new ArrayList<>();
|
||||
|
||||
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
|
||||
|
||||
private final static String SRA_LIBS_DOWNLOAD = "samjdk.sra_libraries_download";
|
||||
/**
|
||||
* Set GATK version to be used as part of user agent for network requests
|
||||
*/
|
||||
static {
|
||||
System.setProperty(SRA_LIBS_DOWNLOAD, "true");
|
||||
SRAAccession.setAppVersionString("GATK " + getVersionNumber());
|
||||
}
|
||||
|
||||
/**
|
||||
* this is the function that the inheriting class can expect to have called
|
||||
* when the command line system has initialized.
|
||||
|
|
|
|||
|
|
@ -905,7 +905,7 @@ public class GenomeAnalysisEngine {
|
|||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource createReadsDataSource(final GATKArgumentCollection argCollection, final GenomeLocParser genomeLocParser,
|
||||
final IndexedFastaSequenceFile refReader, final Map<String, String> sampleRenameMap) {
|
||||
final ReferenceSequenceFile refReader, final Map<String, String> sampleRenameMap) {
|
||||
DownsamplingMethod downsamplingMethod = getDownsamplingMethod();
|
||||
|
||||
// Synchronize the method back into the collection so that it shows up when
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.engine.alignment.bwa.java;
|
|||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.alignment.Aligner;
|
||||
import org.broadinstitute.gatk.engine.alignment.Alignment;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
|
|
@ -126,7 +127,7 @@ public class AlignerTestHarness {
|
|||
|
||||
mismatches++;
|
||||
|
||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||
final ReferenceSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||
|
||||
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
||||
read.getAlignmentStart(),
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.ReadProperties;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
||||
|
|
@ -62,7 +62,7 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
* @param shard The chunk of data over which traversals happen.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
public LocusShardDataProvider(final Shard shard, final ReadProperties sourceInfo, final GenomeLocParser genomeLocParser, final GenomeLoc locus, final LocusIterator locusIterator, final ReferenceSequenceFile reference, final Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,genomeLocParser,reference,rods);
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.locus = locus;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||
|
|
@ -50,7 +51,7 @@ public class ReadShardDataProvider extends ShardDataProvider {
|
|||
* @param shard The chunk of data over which traversals happen.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ReadShardDataProvider(Shard shard, GenomeLocParser genomeLocParser, GATKSAMIterator reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
public ReadShardDataProvider(final Shard shard, final GenomeLocParser genomeLocParser, final GATKSAMIterator reads, final ReferenceSequenceFile reference, final Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,genomeLocParser,reference,rods);
|
||||
this.reads = reads;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|||
import htsjdk.samtools.reference.ReferenceSequence;
|
||||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
|
@ -61,13 +62,13 @@ public class ReferenceView implements View {
|
|||
/**
|
||||
* The source of reference data.
|
||||
*/
|
||||
protected IndexedFastaSequenceFile reference = null;
|
||||
protected ReferenceSequenceFile reference = null;
|
||||
|
||||
/**
|
||||
* Create a new ReferenceView.
|
||||
* @param provider
|
||||
*/
|
||||
public ReferenceView( ShardDataProvider provider ) {
|
||||
public ReferenceView( final ShardDataProvider provider ) {
|
||||
this.genomeLocParser = provider.getGenomeLocParser();
|
||||
this.reference = provider.getReference();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
@ -70,7 +70,7 @@ public abstract class ShardDataProvider {
|
|||
/**
|
||||
* Provider of reference data for this particular shard.
|
||||
*/
|
||||
private final IndexedFastaSequenceFile reference;
|
||||
private final ReferenceSequenceFile reference;
|
||||
|
||||
/**
|
||||
* Sources of reference-ordered data.
|
||||
|
|
@ -106,7 +106,7 @@ public abstract class ShardDataProvider {
|
|||
* Gets a pointer into the given indexed fasta sequence file.
|
||||
* @return The indexed fasta sequence file.
|
||||
*/
|
||||
IndexedFastaSequenceFile getReference() {
|
||||
ReferenceSequenceFile getReference() {
|
||||
return reference;
|
||||
}
|
||||
|
||||
|
|
@ -131,7 +131,7 @@ public abstract class ShardDataProvider {
|
|||
* @param shard The chunk of data over which traversals happen.
|
||||
* @param reference A getter for a section of the reference.
|
||||
*/
|
||||
public ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser,IndexedFastaSequenceFile reference,Collection<ReferenceOrderedDataSource> rods) {
|
||||
public ShardDataProvider(final Shard shard, final GenomeLocParser genomeLocParser, final ReferenceSequenceFile reference, final Collection<ReferenceOrderedDataSource> rods) {
|
||||
this.shard = shard;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.reference = reference;
|
||||
|
|
|
|||
|
|
@ -25,20 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
||||
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
||||
import htsjdk.samtools.seekablestream.SeekableStream;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import htsjdk.samtools.Bin;
|
||||
|
||||
/**
|
||||
* A basic interface for querying BAM indices.
|
||||
|
|
@ -47,116 +34,24 @@ import java.util.List;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class GATKBAMIndex {
|
||||
public abstract class GATKBAMIndex {
|
||||
|
||||
/**
|
||||
* BAM index file magic number.
|
||||
* What is the starting bin for each level?
|
||||
*/
|
||||
private static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
|
||||
protected static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
|
||||
|
||||
/**
|
||||
* Reports the total amount of genomic data that any bin can index.
|
||||
*/
|
||||
protected static final int BIN_GENOMIC_SPAN = 512*1024*1024;
|
||||
|
||||
/**
|
||||
* What is the starting bin for each level?
|
||||
*/
|
||||
private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
|
||||
|
||||
/**
|
||||
* Reports the maximum number of bins that can appear in a BAM file.
|
||||
*/
|
||||
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
|
||||
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
private final File mFile;
|
||||
|
||||
//TODO: figure out a good value for this buffer size
|
||||
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
||||
|
||||
/**
|
||||
* Number of sequences stored in this index.
|
||||
*/
|
||||
private final int sequenceCount;
|
||||
|
||||
/**
|
||||
* A cache of the starting positions of the sequences.
|
||||
*/
|
||||
private final long[] sequenceStartCache;
|
||||
|
||||
private SeekableFileStream fileStream;
|
||||
private SeekableStream baiStream;
|
||||
private SeekableBufferedStream bufferedStream;
|
||||
private long fileLength;
|
||||
|
||||
public GATKBAMIndex(final File file, final SAMSequenceDictionary sequenceDictionary) {
|
||||
mFile = file;
|
||||
this.sequenceDictionary = sequenceDictionary;
|
||||
|
||||
// Open the file stream.
|
||||
openIndexFile();
|
||||
|
||||
// Verify the magic number.
|
||||
seek(0);
|
||||
final byte[] buffer = readBytes(4);
|
||||
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
||||
throw new ReviewedGATKException("Invalid file header in BAM index " + mFile +
|
||||
": " + new String(buffer));
|
||||
}
|
||||
|
||||
seek(4);
|
||||
|
||||
sequenceCount = readInteger();
|
||||
|
||||
// Create a cache of the starting position of each sequence. Initialize it to -1.
|
||||
sequenceStartCache = new long[sequenceCount];
|
||||
for(int i = 1; i < sequenceCount; i++)
|
||||
sequenceStartCache[i] = -1;
|
||||
|
||||
// Seed the first element in the array with the current position.
|
||||
if(sequenceCount > 0)
|
||||
sequenceStartCache[0] = position();
|
||||
|
||||
closeIndexFile();
|
||||
}
|
||||
|
||||
public GATKBAMIndexData readReferenceSequence(final int referenceSequence) {
|
||||
openIndexFile();
|
||||
|
||||
if (referenceSequence >= sequenceCount)
|
||||
throw new ReviewedGATKException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
|
||||
|
||||
skipToSequence(referenceSequence);
|
||||
|
||||
int binCount = readInteger();
|
||||
List<GATKBin> bins = new ArrayList<>();
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
final int indexBin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
|
||||
List<GATKChunk> chunks = new ArrayList<>(nChunks);
|
||||
long[] rawChunkData = readLongs(nChunks*2);
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = rawChunkData[ci*2];
|
||||
final long chunkEnd = rawChunkData[ci*2+1];
|
||||
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
||||
}
|
||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||
while(indexBin >= bins.size())
|
||||
bins.add(null);
|
||||
bins.set(indexBin,bin);
|
||||
}
|
||||
|
||||
final int nLinearBins = readInteger();
|
||||
long[] linearIndexEntries = readLongs(nLinearBins);
|
||||
|
||||
LinearIndex linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
||||
|
||||
closeIndexFile();
|
||||
|
||||
return new GATKBAMIndexData(this,referenceSequence,bins,linearIndex);
|
||||
}
|
||||
public abstract GATKBAMIndexData readReferenceSequence(final int referenceSequence);
|
||||
|
||||
/**
|
||||
* Get the number of levels employed by this index.
|
||||
|
|
@ -180,91 +75,35 @@ public class GATKBAMIndex {
|
|||
* @param levelNumber Level number. 0-based.
|
||||
* @return The size (number of possible bins) of the given level.
|
||||
*/
|
||||
public int getLevelSize(final int levelNumber) {
|
||||
if(levelNumber == getNumIndexLevels()-1)
|
||||
return MAX_BINS-LEVEL_STARTS[levelNumber]-1;
|
||||
else
|
||||
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
||||
}
|
||||
public abstract int getLevelSize(final int levelNumber);
|
||||
|
||||
/**
|
||||
* Gets the level associated with the given bin number.
|
||||
* @param bin The bin for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||
throw new ReviewedGATKException("Tried to get level for invalid bin in index file " + mFile);
|
||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
||||
return i;
|
||||
}
|
||||
throw new ReviewedGATKException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
|
||||
}
|
||||
public abstract int getLevelForBin(final Bin bin);
|
||||
|
||||
/**
|
||||
* Gets the first locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
|
||||
}
|
||||
public abstract int getFirstLocusInBin(final Bin bin);
|
||||
|
||||
/**
|
||||
* Gets the last locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||
}
|
||||
public abstract int getLastLocusInBin(final Bin bin);
|
||||
|
||||
/**
|
||||
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||
* @return The file offset of the first record in the last linear bin, or -1
|
||||
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||
*/
|
||||
public long getStartOfLastLinearBin() {
|
||||
openIndexFile();
|
||||
|
||||
seek(4);
|
||||
|
||||
final int sequenceCount = readInteger();
|
||||
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||
// grab the last element of the linear index for each sequence, and return
|
||||
// the last one from the last sequence that has one.
|
||||
long lastLinearIndexPointer = -1;
|
||||
for (int i = 0; i < sequenceCount; i++) {
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j1 = 0; j1 < nBins; j1++) {
|
||||
// Skip bin #
|
||||
skipBytes(4);
|
||||
final int nChunks = readInteger();
|
||||
// Skip chunks
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
if (nLinearBins > 0) {
|
||||
// Skip to last element of list of linear bins
|
||||
skipBytes(8 * (nLinearBins - 1));
|
||||
lastLinearIndexPointer = readLongs(1)[0];
|
||||
}
|
||||
}
|
||||
|
||||
closeIndexFile();
|
||||
|
||||
return lastLinearIndexPointer;
|
||||
}
|
||||
public abstract long getStartOfLastLinearBin();
|
||||
|
||||
/**
|
||||
* Gets the possible number of bins for a given reference sequence.
|
||||
|
|
@ -273,197 +112,4 @@ public class GATKBAMIndex {
|
|||
protected int getMaxAddressibleGenomicLocation() {
|
||||
return BIN_GENOMIC_SPAN;
|
||||
}
|
||||
|
||||
protected void skipToSequence(final int referenceSequence) {
|
||||
// Find the offset in the file of the last sequence whose position has been determined. Start here
|
||||
// when searching the sequence for the next value to read. (Note that sequenceStartCache[0] will always
|
||||
// be present, so no extra stopping condition is necessary.
|
||||
int sequenceIndex = referenceSequence;
|
||||
while(sequenceStartCache[sequenceIndex] == -1)
|
||||
sequenceIndex--;
|
||||
|
||||
// Advance to the most recently found position.
|
||||
seek(sequenceStartCache[sequenceIndex]);
|
||||
|
||||
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
||||
sequenceStartCache[i] = position();
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j = 0; j < nBins; j++) {
|
||||
/* final int bin = */
|
||||
readInteger();
|
||||
final int nChunks = readInteger();
|
||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||
skipBytes(8 * nLinearBins);
|
||||
|
||||
}
|
||||
|
||||
sequenceStartCache[referenceSequence] = position();
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void openIndexFile() {
|
||||
try {
|
||||
fileStream = new SeekableFileStream(mFile);
|
||||
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
|
||||
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
|
||||
fileLength=bufferedStream.length();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedGATKException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
||||
}
|
||||
}
|
||||
|
||||
private void closeIndexFile() {
|
||||
try {
|
||||
bufferedStream.close();
|
||||
baiStream.close();
|
||||
fileStream.close();
|
||||
fileLength = -1;
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedGATKException("Unable to close index file " + mFile, exc);
|
||||
}
|
||||
}
|
||||
|
||||
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
||||
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||
|
||||
private byte[] readBytes(int count) {
|
||||
ByteBuffer buffer = getBuffer(count);
|
||||
read(buffer);
|
||||
buffer.flip();
|
||||
byte[] contents = new byte[count];
|
||||
buffer.get(contents);
|
||||
return contents;
|
||||
}
|
||||
|
||||
private int readInteger() {
|
||||
ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
|
||||
read(buffer);
|
||||
buffer.flip();
|
||||
return buffer.getInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
||||
* @param count Number of longs to read.
|
||||
* @return An array of longs. Size of array should match count.
|
||||
*/
|
||||
private long[] readLongs(final int count) {
|
||||
ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
|
||||
read(buffer);
|
||||
buffer.flip();
|
||||
long[] result = new long[count];
|
||||
for(int i = 0; i < count; i++)
|
||||
result[i] = buffer.getLong();
|
||||
return result;
|
||||
}
|
||||
|
||||
private void read(final ByteBuffer buffer) {
|
||||
final int bytesRequested = buffer.limit();
|
||||
if (bytesRequested == 0)
|
||||
return;
|
||||
|
||||
try {
|
||||
|
||||
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
||||
//and put the result into the bytebuffer after the if statement.
|
||||
|
||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||
// our buffer to -- if there isn't enough data in the file, the index
|
||||
// must be truncated or otherwise corrupt:
|
||||
if(bytesRequested > fileLength - bufferedStream.position()){
|
||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||
"It's likely that this file is truncated or corrupt -- " +
|
||||
"Please try re-indexing the corresponding BAM file.",
|
||||
mFile));
|
||||
}
|
||||
|
||||
int bytesRead = bufferedStream.read(byteArray, 0, bytesRequested);
|
||||
|
||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||
// our buffer to -- if we encounter EOF (-1), the index
|
||||
// must be truncated or otherwise corrupt:
|
||||
if (bytesRead <= 0) {
|
||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||
"It's likely that this file is truncated or corrupt -- " +
|
||||
"Please try re-indexing the corresponding BAM file.",
|
||||
mFile));
|
||||
}
|
||||
|
||||
if(bytesRead != bytesRequested)
|
||||
throw new RuntimeException("Read amount different from requested amount. This should not happen.");
|
||||
|
||||
buffer.put(byteArray, 0, bytesRequested);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedGATKException("Index: unable to read bytes from index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A reusable buffer for use by this index generator.
|
||||
* TODO: Should this be a SoftReference?
|
||||
*/
|
||||
private ByteBuffer buffer = null;
|
||||
|
||||
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
||||
private byte[] byteArray=null;
|
||||
private ByteBuffer getBuffer(final int size) {
|
||||
if(buffer == null || buffer.capacity() < size) {
|
||||
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||
buffer = ByteBuffer.allocate(size);
|
||||
byteArray = new byte[size];
|
||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
buffer.clear();
|
||||
buffer.limit(size);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private void skipBytes(final int count) {
|
||||
try {
|
||||
|
||||
//try to skip forward the requested amount.
|
||||
long skipped = bufferedStream.skip(count);
|
||||
|
||||
if( skipped != count ) { //if not managed to skip the requested amount
|
||||
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
private void seek(final long position) {
|
||||
try {
|
||||
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
||||
bufferedStream.seek(position);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedGATKException("Index: unable to reposition of file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the position from the current file channel.
|
||||
* @return position of the current file channel.
|
||||
*/
|
||||
private long position() {
|
||||
try {
|
||||
return bufferedStream.position();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedGATKException("Unable to read position from index file " + mFile, exc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.*;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class implements BAM index querying API
|
||||
* by wrapping a class BrowseableBAMIndex from HTSJDK.
|
||||
*
|
||||
* @version 0.1
|
||||
*/
|
||||
public class GATKBAMIndexFromDataSource extends GATKBAMIndex {
|
||||
private File sourceFile;
|
||||
private SAMFileHeader sourceHeader;
|
||||
private BrowseableBAMIndex index;
|
||||
|
||||
public GATKBAMIndexFromDataSource(final File sourceFile, final SAMFileHeader sourceHeader, final BrowseableBAMIndex index) {
|
||||
this.sourceFile = sourceFile;
|
||||
this.sourceHeader = sourceHeader;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKBAMIndexData readReferenceSequence(final int referenceSequence) {
|
||||
final List<SAMSequenceRecord> sequences = sourceHeader.getSequenceDictionary().getSequences();
|
||||
if (referenceSequence >= sequences.size())
|
||||
throw new ReviewedGATKException("Sequence number " + referenceSequence + " cannot be greater or equal to " + sequences.size() + " in index file " + sourceFile);
|
||||
|
||||
|
||||
final BinList sourceBins = index.getBinsOverlapping(referenceSequence, 0, sequences.get(referenceSequence).getSequenceLength());
|
||||
|
||||
final List<GATKBin> bins = new ArrayList<>();
|
||||
for (Bin sourceBin : sourceBins) {
|
||||
final int indexBin = sourceBin.getBinNumber();
|
||||
while(indexBin >= bins.size())
|
||||
bins.add(null);
|
||||
|
||||
final GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||
final List<Chunk> chunks = index.getSpanOverlapping(sourceBin).getChunks();
|
||||
final List<GATKChunk> gatkChunks = new ArrayList<>(chunks.size());
|
||||
for (Chunk chunk : chunks) {
|
||||
gatkChunks.add(new GATKChunk(chunk));
|
||||
}
|
||||
|
||||
bin.setChunkList(gatkChunks.toArray(new GATKChunk[gatkChunks.size()]));
|
||||
|
||||
bins.set(indexBin, bin);
|
||||
}
|
||||
|
||||
// there is no interface to get linear index from HTSJDK
|
||||
final LinearIndex linearIndex = new LinearIndex(referenceSequence, 0, new long[]{});
|
||||
|
||||
return new GATKBAMIndexData(this,referenceSequence,bins,linearIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLevelSize(int levelNumber) {
|
||||
return index.getLevelSize(levelNumber);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLevelForBin(Bin bin) {
|
||||
return index.getLevelForBin(bin);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFirstLocusInBin(Bin bin) {
|
||||
return index.getFirstLocusInBin(bin);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLastLocusInBin(Bin bin) {
|
||||
return index.getLastLocusInBin(bin);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getStartOfLastLinearBin() {
|
||||
return index.getStartOfLastLinearBin();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,435 @@
|
|||
/*
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
|
||||
import htsjdk.samtools.seekablestream.SeekableFileStream;
|
||||
import htsjdk.samtools.seekablestream.SeekableStream;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class implements BAM index querying API
|
||||
* by reading .bai file directly.
|
||||
* Very much not thread-safe.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class GATKBAMIndexFromFile extends GATKBAMIndex {
|
||||
/**
|
||||
* BAM index file magic number.
|
||||
*/
|
||||
private static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
|
||||
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
private final File mFile;
|
||||
|
||||
//TODO: figure out a good value for this buffer size
|
||||
private static final int BUFFERED_STREAM_BUFFER_SIZE = 8192;
|
||||
|
||||
/**
|
||||
* Number of sequences stored in this index.
|
||||
*/
|
||||
private final int sequenceCount;
|
||||
|
||||
/**
|
||||
* A cache of the starting positions of the sequences.
|
||||
*/
|
||||
private final long[] sequenceStartCache;
|
||||
|
||||
private SeekableFileStream fileStream;
|
||||
private SeekableStream baiStream;
|
||||
private SeekableBufferedStream bufferedStream;
|
||||
private long fileLength;
|
||||
|
||||
public GATKBAMIndexFromFile(final File file, final SAMSequenceDictionary sequenceDictionary) {
|
||||
mFile = file;
|
||||
this.sequenceDictionary = sequenceDictionary;
|
||||
|
||||
// Open the file stream.
|
||||
openIndexFile();
|
||||
|
||||
// Verify the magic number.
|
||||
seek(0);
|
||||
final byte[] buffer = readBytes(4);
|
||||
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
||||
throw new ReviewedGATKException("Invalid file header in BAM index " + mFile +
|
||||
": " + new String(buffer));
|
||||
}
|
||||
|
||||
seek(4);
|
||||
|
||||
sequenceCount = readInteger();
|
||||
|
||||
// Create a cache of the starting position of each sequence. Initialize it to -1.
|
||||
sequenceStartCache = new long[sequenceCount];
|
||||
for(int i = 1; i < sequenceCount; i++)
|
||||
sequenceStartCache[i] = -1;
|
||||
|
||||
// Seed the first element in the array with the current position.
|
||||
if(sequenceCount > 0)
|
||||
sequenceStartCache[0] = position();
|
||||
|
||||
closeIndexFile();
|
||||
}
|
||||
|
||||
public GATKBAMIndexData readReferenceSequence(final int referenceSequence) {
|
||||
openIndexFile();
|
||||
|
||||
if (referenceSequence >= sequenceCount)
|
||||
throw new ReviewedGATKException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
|
||||
|
||||
skipToSequence(referenceSequence);
|
||||
|
||||
final int binCount = readInteger();
|
||||
List<GATKBin> bins = new ArrayList<>();
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
final int indexBin = readInteger();
|
||||
final int nChunks = readInteger();
|
||||
|
||||
final List<GATKChunk> chunks = new ArrayList<>(nChunks);
|
||||
final long[] rawChunkData = readLongs(nChunks*2);
|
||||
for (int ci = 0; ci < nChunks; ci++) {
|
||||
final long chunkBegin = rawChunkData[ci*2];
|
||||
final long chunkEnd = rawChunkData[ci*2+1];
|
||||
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
||||
}
|
||||
final GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||
while(indexBin >= bins.size())
|
||||
bins.add(null);
|
||||
bins.set(indexBin,bin);
|
||||
}
|
||||
|
||||
final int nLinearBins = readInteger();
|
||||
long[] linearIndexEntries = readLongs(nLinearBins);
|
||||
|
||||
final LinearIndex linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
||||
|
||||
closeIndexFile();
|
||||
|
||||
return new GATKBAMIndexData(this,referenceSequence,bins,linearIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of bins in the given level.
|
||||
* @param levelNumber Level number. 0-based.
|
||||
* @return The size (number of possible bins) of the given level.
|
||||
*/
|
||||
@Override
|
||||
public int getLevelSize(final int levelNumber) {
|
||||
if(levelNumber == getNumIndexLevels()-1)
|
||||
return MAX_BINS-LEVEL_STARTS[levelNumber]-1;
|
||||
else
|
||||
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the level associated with the given bin number.
|
||||
* @param bin The bin for which to determine the level.
|
||||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
@Override
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
final GATKBin gatkBin = new GATKBin(bin);
|
||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||
throw new ReviewedGATKException("Tried to get level for invalid bin in index file " + mFile);
|
||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
||||
return i;
|
||||
}
|
||||
throw new ReviewedGATKException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
@Override
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last locus that this bin can index into.
|
||||
* @param bin The bin to test.
|
||||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
@Override
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
final int level = getLevelForBin(bin);
|
||||
final int levelStart = LEVEL_STARTS[level];
|
||||
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
|
||||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||
* @return The file offset of the first record in the last linear bin, or -1
|
||||
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||
*/
|
||||
@Override
|
||||
public long getStartOfLastLinearBin() {
|
||||
openIndexFile();
|
||||
|
||||
seek(4);
|
||||
|
||||
final int sequenceCount = readInteger();
|
||||
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||
// grab the last element of the linear index for each sequence, and return
|
||||
// the last one from the last sequence that has one.
|
||||
long lastLinearIndexPointer = -1;
|
||||
for (int i = 0; i < sequenceCount; i++) {
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j1 = 0; j1 < nBins; j1++) {
|
||||
// Skip bin #
|
||||
skipBytes(4);
|
||||
final int nChunks = readInteger();
|
||||
// Skip chunks
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
if (nLinearBins > 0) {
|
||||
// Skip to last element of list of linear bins
|
||||
skipBytes(8 * (nLinearBins - 1));
|
||||
lastLinearIndexPointer = readLongs(1)[0];
|
||||
}
|
||||
}
|
||||
|
||||
closeIndexFile();
|
||||
|
||||
return lastLinearIndexPointer;
|
||||
}
|
||||
|
||||
protected void skipToSequence(final int referenceSequence) {
|
||||
// Find the offset in the file of the last sequence whose position has been determined. Start here
|
||||
// when searching the sequence for the next value to read. (Note that sequenceStartCache[0] will always
|
||||
// be present, so no extra stopping condition is necessary.
|
||||
int sequenceIndex = referenceSequence;
|
||||
while(sequenceStartCache[sequenceIndex] == -1)
|
||||
sequenceIndex--;
|
||||
|
||||
// Advance to the most recently found position.
|
||||
seek(sequenceStartCache[sequenceIndex]);
|
||||
|
||||
for (int i = sequenceIndex; i < referenceSequence; i++) {
|
||||
sequenceStartCache[i] = position();
|
||||
// System.out.println("# Sequence TID: " + i);
|
||||
final int nBins = readInteger();
|
||||
// System.out.println("# nBins: " + nBins);
|
||||
for (int j = 0; j < nBins; j++) {
|
||||
/* final int bin = */
|
||||
readInteger();
|
||||
final int nChunks = readInteger();
|
||||
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
|
||||
skipBytes(16 * nChunks);
|
||||
}
|
||||
final int nLinearBins = readInteger();
|
||||
// System.out.println("# nLinearBins: " + nLinearBins);
|
||||
skipBytes(8 * nLinearBins);
|
||||
|
||||
}
|
||||
|
||||
sequenceStartCache[referenceSequence] = position();
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void openIndexFile() {
|
||||
try {
|
||||
fileStream = new SeekableFileStream(mFile);
|
||||
baiStream = SamIndexes.asBaiSeekableStreamOrNull(fileStream, sequenceDictionary);
|
||||
bufferedStream = new SeekableBufferedStream(baiStream, BUFFERED_STREAM_BUFFER_SIZE);
|
||||
fileLength=bufferedStream.length();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedGATKException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
|
||||
}
|
||||
}
|
||||
|
||||
private void closeIndexFile() {
|
||||
try {
|
||||
bufferedStream.close();
|
||||
baiStream.close();
|
||||
fileStream.close();
|
||||
fileLength = -1;
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedGATKException("Unable to close index file " + mFile, exc);
|
||||
}
|
||||
}
|
||||
|
||||
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
||||
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||
|
||||
private byte[] readBytes(int count) {
|
||||
final ByteBuffer buffer = getBuffer(count);
|
||||
read(buffer);
|
||||
buffer.flip();
|
||||
byte[] contents = new byte[count];
|
||||
buffer.get(contents);
|
||||
return contents;
|
||||
}
|
||||
|
||||
private int readInteger() {
|
||||
final ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
|
||||
read(buffer);
|
||||
buffer.flip();
|
||||
return buffer.getInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
||||
* @param count Number of longs to read.
|
||||
* @return An array of longs. Size of array should match count.
|
||||
*/
|
||||
private long[] readLongs(final int count) {
|
||||
final ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
|
||||
read(buffer);
|
||||
buffer.flip();
|
||||
long[] result = new long[count];
|
||||
for(int i = 0; i < count; i++)
|
||||
result[i] = buffer.getLong();
|
||||
return result;
|
||||
}
|
||||
|
||||
private void read(final ByteBuffer buffer) {
|
||||
final int bytesRequested = buffer.limit();
|
||||
if (bytesRequested == 0)
|
||||
return;
|
||||
|
||||
try {
|
||||
|
||||
//BufferedInputStream cannot read directly into a byte buffer, so we read into an array
|
||||
//and put the result into the bytebuffer after the if statement.
|
||||
|
||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||
// our buffer to -- if there isn't enough data in the file, the index
|
||||
// must be truncated or otherwise corrupt:
|
||||
if(bytesRequested > fileLength - bufferedStream.position()){
|
||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||
"It's likely that this file is truncated or corrupt -- " +
|
||||
"Please try re-indexing the corresponding BAM file.",
|
||||
mFile));
|
||||
}
|
||||
|
||||
int bytesRead = bufferedStream.read(byteArray, 0, bytesRequested);
|
||||
|
||||
// We have a rigid expectation here to read in exactly the number of bytes we've limited
|
||||
// our buffer to -- if we encounter EOF (-1), the index
|
||||
// must be truncated or otherwise corrupt:
|
||||
if (bytesRead <= 0) {
|
||||
throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " +
|
||||
"It's likely that this file is truncated or corrupt -- " +
|
||||
"Please try re-indexing the corresponding BAM file.",
|
||||
mFile));
|
||||
}
|
||||
|
||||
if(bytesRead != bytesRequested)
|
||||
throw new RuntimeException("Read amount different from requested amount. This should not happen.");
|
||||
|
||||
buffer.put(byteArray, 0, bytesRequested);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedGATKException("Index: unable to read bytes from index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A reusable buffer for use by this index generator.
|
||||
* TODO: Should this be a SoftReference?
|
||||
*/
|
||||
private ByteBuffer buffer = null;
|
||||
|
||||
//BufferedStream don't read into ByteBuffers, so we need this temporary array
|
||||
private byte[] byteArray=null;
|
||||
private ByteBuffer getBuffer(final int size) {
|
||||
if(buffer == null || buffer.capacity() < size) {
|
||||
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||
buffer = ByteBuffer.allocate(size);
|
||||
byteArray = new byte[size];
|
||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
buffer.clear();
|
||||
buffer.limit(size);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private void skipBytes(final int count) {
|
||||
try {
|
||||
|
||||
//try to skip forward the requested amount.
|
||||
final long skipped = bufferedStream.skip(count);
|
||||
|
||||
if( skipped != count ) { //if not managed to skip the requested amount
|
||||
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedGATKException("Index: unable to reposition file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
private void seek(final long position) {
|
||||
try {
|
||||
//to seek a new position, move the fileChannel, and reposition the bufferedStream
|
||||
bufferedStream.seek(position);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedGATKException("Index: unable to reposition of file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the position from the current file channel.
|
||||
* @return position of the current file channel.
|
||||
*/
|
||||
private long position() {
|
||||
try {
|
||||
return bufferedStream.position();
|
||||
}
|
||||
catch (IOException exc) {
|
||||
throw new ReviewedGATKException("Unable to read position from index file " + mFile, exc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -28,7 +28,10 @@ package org.broadinstitute.gatk.engine.datasources.reads;
|
|||
import htsjdk.samtools.MergingSamRecordIterator;
|
||||
import htsjdk.samtools.SamFileHeaderMerger;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
|
||||
import htsjdk.samtools.sra.SRAAccession;
|
||||
import htsjdk.samtools.sra.SRAIndexedSequenceFile;
|
||||
import htsjdk.samtools.util.CloseableIterator;
|
||||
import htsjdk.samtools.util.CloserUtil;
|
||||
import htsjdk.samtools.util.RuntimeIOException;
|
||||
|
|
@ -51,7 +54,6 @@ import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
|||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.gatk.utils.iterators.GATKSAMIteratorAdapter;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecordIterator;
|
||||
import org.broadinstitute.gatk.utils.sam.SAMReaderID;
|
||||
|
||||
|
|
@ -310,7 +312,7 @@ public class SAMDataSource {
|
|||
|
||||
// Determine the sort order.
|
||||
for(SAMReaderID readerID: readerIDs) {
|
||||
if (! readerID.getSamFile().canRead() )
|
||||
if (!SRAAccession.isValid(readerID.getSamFile().getPath()) && !readerID.getSamFile().canRead() )
|
||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(),"file is not present or user does not have appropriate permissions. " +
|
||||
"Please check that the file is present and readable and try again.");
|
||||
|
||||
|
|
@ -377,15 +379,28 @@ public class SAMDataSource {
|
|||
if (referenceFile == null) {
|
||||
samSequenceDictionary = mergedHeader.getSequenceDictionary();
|
||||
} else {
|
||||
samSequenceDictionary = ReferenceSequenceFileFactory.
|
||||
getReferenceSequenceFile(referenceFile).
|
||||
getSequenceDictionary();
|
||||
ReferenceSequenceFile ref;
|
||||
// maybe it is SRA file?
|
||||
if (SRAAccession.isValid(referenceFile.getPath())) {
|
||||
ref = new SRAIndexedSequenceFile(new SRAAccession(referenceFile.getPath()));
|
||||
} else {
|
||||
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile);
|
||||
}
|
||||
samSequenceDictionary = ref.getSequenceDictionary();
|
||||
}
|
||||
|
||||
for(SAMReaderID id: readerIDs) {
|
||||
File indexFile = findIndexFile(id.getSamFile());
|
||||
if(indexFile != null)
|
||||
bamIndices.put(id,new GATKBAMIndex(indexFile, samSequenceDictionary));
|
||||
if(indexFile != null) {
|
||||
bamIndices.put(id, new GATKBAMIndexFromFile(indexFile, samSequenceDictionary));
|
||||
continue;
|
||||
}
|
||||
|
||||
// if index file is not found, try SamReader indexing interface
|
||||
SamReader reader = readers.getReader(id);
|
||||
if (reader.indexing().hasBrowseableIndex()) {
|
||||
bamIndices.put(id, new GATKBAMIndexFromDataSource(id.getSamFile(), reader.getFileHeader(), reader.indexing().getBrowseableIndex()));
|
||||
}
|
||||
}
|
||||
|
||||
resourcePool.releaseReaders(readers);
|
||||
|
|
@ -1122,12 +1137,17 @@ public class SAMDataSource {
|
|||
try {
|
||||
if (threadAllocation.getNumIOThreads() > 0)
|
||||
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
||||
reader = SamReaderFactory.makeDefault()
|
||||
.referenceSequence(referenceFile)
|
||||
SamReaderFactory factory = SamReaderFactory.makeDefault()
|
||||
.validationStringency(validationStringency)
|
||||
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
|
||||
.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false);
|
||||
|
||||
if (SRAAccession.isValid(readerID.getSamFile().getPath())) {
|
||||
reader = factory.open(SamInputResource.of(new SRAAccession(readerID.getSamFile().getPath())));
|
||||
} else {
|
||||
reader = factory.referenceSequence(referenceFile)
|
||||
.setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, true)
|
||||
.open(readerID.getSamFile());
|
||||
}
|
||||
|
||||
} catch ( RuntimeIOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(readerID.getSamFile(), e);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.engine.datasources.reads.utilities;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.gatk.utils.commandline.Input;
|
||||
|
|
@ -88,12 +89,12 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
@Override
|
||||
public int execute() throws IOException {
|
||||
// initialize reference
|
||||
IndexedFastaSequenceFile refReader = new IndexedFastaSequenceFile(referenceFile);
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(refReader);
|
||||
final ReferenceSequenceFile refReader = new IndexedFastaSequenceFile(referenceFile);
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(refReader);
|
||||
|
||||
// initialize reads
|
||||
List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
|
||||
SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
||||
final SAMDataSource dataSource = new SAMDataSource(referenceFile, bamReaders, new ThreadAllocation(), null, genomeLocParser);
|
||||
|
||||
// intervals
|
||||
final GenomeLocSortedSet intervalSortedSet;
|
||||
|
|
@ -124,12 +125,12 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
}
|
||||
|
||||
// Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N
|
||||
long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
|
||||
long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
|
||||
final long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
|
||||
final long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
|
||||
logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size = %d%n",numberOfShards,mean,stddev));
|
||||
|
||||
// Crank through the shards again, this time reporting on the shards significantly larger than the mean.
|
||||
long threshold = mean + stddev*5;
|
||||
final long threshold = mean + stddev*5;
|
||||
logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
|
||||
out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");
|
||||
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.reference;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.LocusShard;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.Shard;
|
||||
|
|
@ -46,7 +46,7 @@ import java.util.List;
|
|||
* Looks for fai and dict files, and tries to create them if they don't exist
|
||||
*/
|
||||
public class ReferenceDataSource {
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private ReferenceSequenceFile reference;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
|
||||
|
|
@ -55,7 +55,7 @@ public class ReferenceDataSource {
|
|||
* Create reference data source from fasta file
|
||||
* @param fastaFile Fasta file to be used as reference
|
||||
*/
|
||||
public ReferenceDataSource(File fastaFile) {
|
||||
public ReferenceDataSource(final File fastaFile) {
|
||||
reference = CachingIndexedFastaSequenceFile.checkAndCreate(fastaFile);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ public class ReferenceDataSource {
|
|||
* Get indexed fasta file
|
||||
* @return IndexedFastaSequenceFile that was created from file
|
||||
*/
|
||||
public IndexedFastaSequenceFile getReference() {
|
||||
public ReferenceSequenceFile getReference() {
|
||||
return this.reference;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.executive;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.TribbleException;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
|
||||
|
|
@ -111,7 +111,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
protected HierarchicalMicroScheduler(final GenomeAnalysisEngine engine,
|
||||
final Walker walker,
|
||||
final SAMDataSource reads,
|
||||
final IndexedFastaSequenceFile reference,
|
||||
final ReferenceSequenceFile reference,
|
||||
final Collection<ReferenceOrderedDataSource> rods,
|
||||
final ThreadAllocation threadAllocation) {
|
||||
super(engine, walker, reads, reference, rods, threadAllocation);
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.executive;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.gatk.engine.datasources.providers.ReadShardDataProvider;
|
||||
|
|
@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
protected LinearMicroScheduler(final GenomeAnalysisEngine engine,
|
||||
final Walker walker,
|
||||
final SAMDataSource reads,
|
||||
final IndexedFastaSequenceFile reference,
|
||||
final ReferenceSequenceFile reference,
|
||||
final Collection<ReferenceOrderedDataSource> rods,
|
||||
final ThreadAllocation threadAllocation) {
|
||||
super(engine, walker, reads, reference, rods, threadAllocation);
|
||||
|
|
@ -84,7 +84,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
Accumulator accumulator = Accumulator.create(engine,walker);
|
||||
|
||||
boolean done = walker.isDone();
|
||||
int counter = 0;
|
||||
|
||||
final TraversalEngine traversalEngine = borrowTraversalEngine(this);
|
||||
for (Shard shard : shardStrategy ) {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.gatk.engine.executive;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.ReadMetrics;
|
||||
|
|
@ -104,7 +104,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
*/
|
||||
protected final GenomeAnalysisEngine engine;
|
||||
|
||||
protected final IndexedFastaSequenceFile reference;
|
||||
protected final ReferenceSequenceFile reference;
|
||||
|
||||
private final SAMDataSource reads;
|
||||
protected final Collection<ReferenceOrderedDataSource> rods;
|
||||
|
|
@ -131,7 +131,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
*
|
||||
* @return The best-fit microscheduler.
|
||||
*/
|
||||
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
|
||||
public static MicroScheduler create(final GenomeAnalysisEngine engine, final Walker walker, final SAMDataSource reads, final ReferenceSequenceFile reference,
|
||||
final Collection<ReferenceOrderedDataSource> rods, final ThreadAllocation threadAllocation) {
|
||||
if ( threadAllocation.isRunningInParallelMode() ) {
|
||||
logger.info(String.format("Running the GATK in parallel mode with %d total threads, " +
|
||||
"%d CPU thread(s) for each of %d data thread(s), of %d processors available on this machine",
|
||||
|
|
@ -183,7 +184,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
protected MicroScheduler(final GenomeAnalysisEngine engine,
|
||||
final Walker walker,
|
||||
final SAMDataSource reads,
|
||||
final IndexedFastaSequenceFile reference,
|
||||
final ReferenceSequenceFile reference,
|
||||
final Collection<ReferenceOrderedDataSource> rods,
|
||||
final ThreadAllocation threadAllocation) {
|
||||
this.engine = engine;
|
||||
|
|
@ -397,7 +398,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
* Returns the reference maintained by this scheduler.
|
||||
* @return The reference maintained by this scheduler.
|
||||
*/
|
||||
public IndexedFastaSequenceFile getReference() { return reference; }
|
||||
public ReferenceSequenceFile getReference() { return reference; }
|
||||
|
||||
protected void cleanup() {
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.filters;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.WalkerManager;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
|
|
@ -41,7 +41,7 @@ import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
|||
*/
|
||||
public class BAQReadTransformer extends ReadTransformer {
|
||||
private BAQ baqHMM;
|
||||
private IndexedFastaSequenceFile refReader;
|
||||
private ReferenceSequenceFile refReader;
|
||||
private BAQ.CalculationMode cmode;
|
||||
private BAQ.QualityMode qmode;
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.engine.walkers;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
|
|
@ -181,7 +182,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
// Map over the ActiveRegion
|
||||
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||
|
||||
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
||||
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, final ReferenceSequenceFile reference ) {
|
||||
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionTraversalParameters.class).extension();
|
||||
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
for( final GenomeLoc interval : intervals.toList() ) {
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||
|
|
@ -82,7 +82,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
// Test the accuracy of the read metrics
|
||||
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private ReferenceSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private SAMFileHeader header;
|
||||
private GATKSAMReadGroupRecord readGroup;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
|
|
@ -71,7 +72,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
/**
|
||||
* Sequence file.
|
||||
*/
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private static ReferenceSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.gatk.engine.datasources.providers;
|
||||
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
@ -57,7 +57,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
|||
/**
|
||||
* The fasta, for comparison.
|
||||
*/
|
||||
protected IndexedFastaSequenceFile sequenceFile = null;
|
||||
protected ReferenceSequenceFile sequenceFile = null;
|
||||
protected GenomeLocParser genomeLocParser = null;
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.GATKBAMFileSpan;
|
||||
import htsjdk.samtools.GATKChunk;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
@ -45,7 +45,7 @@ import java.io.FileNotFoundException;
|
|||
*
|
||||
*/
|
||||
public class FilePointerUnitTest extends BaseTest {
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private SAMReaderID readerID = new SAMReaderID("samFile",new Tags());
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.SamReader;
|
||||
import htsjdk.samtools.SamReaderFactory;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Test basic functionality in the GATK's wrapper around htsjdk.samtools.BrowseableBAMIndex.
|
||||
*/
|
||||
public class GATKBAMIndexFromDataSourceUnitTest extends BaseTest {
|
||||
private static File bamFile = new File(validationDataLocation+"MV1994.selected.bam");
|
||||
|
||||
/**
|
||||
* Storage for the index itself.
|
||||
*/
|
||||
private GATKBAMIndex bamIndex;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws IOException {
|
||||
final SAMFileReader reader = new SAMFileReader(bamFile);
|
||||
reader.enableIndexCaching(true); // needed ot get BrowseableBAMIndex
|
||||
Assert.assertTrue(reader.hasIndex());
|
||||
Assert.assertTrue(reader.indexing().hasBrowseableIndex());
|
||||
|
||||
bamIndex = new GATKBAMIndexFromDataSource(bamFile, reader.getFileHeader(), reader.indexing().getBrowseableIndex());
|
||||
reader.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNumberAndSizeOfIndexLevels() {
|
||||
// The correct values for this test are pulled directly from the
|
||||
// SAM Format Specification v1.3-r882, Section 4.1.1, last paragraph.
|
||||
Assert.assertEquals(GATKBAMIndex.getNumIndexLevels(),6,"Incorrect number of levels in BAM index");
|
||||
|
||||
// Level 0
|
||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(0),0);
|
||||
Assert.assertEquals(bamIndex.getLevelSize(0),1);
|
||||
|
||||
// Level 1
|
||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(1),1);
|
||||
Assert.assertEquals(bamIndex.getLevelSize(1),8-1+1);
|
||||
|
||||
// Level 2
|
||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(2),9);
|
||||
Assert.assertEquals(bamIndex.getLevelSize(2),72-9+1);
|
||||
|
||||
// Level 3
|
||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(3),73);
|
||||
Assert.assertEquals(bamIndex.getLevelSize(3),584-73+1);
|
||||
|
||||
// Level 4
|
||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(4),585);
|
||||
Assert.assertEquals(bamIndex.getLevelSize(4),4680-585+1);
|
||||
|
||||
// Level 5
|
||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(5),4681);
|
||||
// Need to wait unitl AbstractBAMFileIndex.getLevelSize() is fixed, will throw an ArrayIndexOutOfBoundsException if 1 before the end of the GenomicIndexUtil.LEVEL_STARTS array
|
||||
//Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = ReviewedGATKException.class )
|
||||
public void testTooManyReferenceSequencesDataSource() {
|
||||
final GATKBAMIndexData index = bamIndex.readReferenceSequence(1);
|
||||
}
|
||||
}
|
||||
|
|
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
|
|||
/**
|
||||
* Test basic functionality in the GATK's implementation of the BAM index classes.
|
||||
*/
|
||||
public class GATKBAMIndexUnitTest extends BaseTest {
|
||||
public class GATKBAMIndexFromFileUnitTest extends BaseTest {
|
||||
private static File bamFile = new File(validationDataLocation+"MV1994.selected.bam");
|
||||
|
||||
/**
|
||||
|
|
@ -60,11 +60,11 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||
this.sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||
final SAMFileReader reader = new SAMFileReader(bamFile);
|
||||
sequenceDictionary = reader.getFileHeader().getSequenceDictionary();
|
||||
reader.close();
|
||||
|
||||
bamIndex = new GATKBAMIndex(bamIndexFile, sequenceDictionary);
|
||||
|
||||
bamIndex = new GATKBAMIndexFromFile(bamIndexFile, sequenceDictionary);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -100,13 +100,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
|
||||
final GATKBAMIndex index = new GATKBAMIndexFromFile(new File(privateTestDir + "truncated_at_word_boundary.bai"), sequenceDictionary);
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
|
||||
public void testDetectTruncatedBamIndeFromFilexNonWordBoundary() {
|
||||
final GATKBAMIndex index = new GATKBAMIndexFromFile(new File(privateTestDir + "truncated_at_non_word_boundary.bai"), sequenceDictionary);
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.reads;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||
|
|
@ -64,7 +64,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
|
||||
private List<SAMReaderID> readers;
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.rmd;
|
||||
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
import org.broadinstitute.gatk.utils.refdata.tracks.RMDTrackBuilder;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -68,7 +69,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
private RMDTriplet triplet = null;
|
||||
private RMDTrackBuilder builder = null;
|
||||
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
private GenomeLoc testSite1;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.datasources.rmd;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
|
|
@ -46,7 +46,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{
|
|||
// Build up query parameters
|
||||
File file = new File(BaseTest.privateTestDir + "NA12878.hg19.example1.vcf");
|
||||
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
GenomeLoc loc = parser.createGenomeLoc("20", 1, 100000);
|
||||
TestRMDTrackBuilder builder = new TestRMDTrackBuilder(seq.getSequenceDictionary(), parser);
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.traversals;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.sam.ArtificialBAMBuilder;
|
||||
|
|
@ -43,7 +43,7 @@ import java.util.List;
|
|||
|
||||
public class TAROrderedReadCacheUnitTest extends BaseTest {
|
||||
// example fasta index file, can be deleted if you don't use the reference
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.engine.traversals;
|
|||
|
||||
import com.google.java.contract.PreconditionError;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
import org.broadinstitute.gatk.utils.ValidationExclusion;
|
||||
import org.broadinstitute.gatk.engine.datasources.reads.*;
|
||||
|
|
@ -39,7 +40,6 @@ import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
|
|||
import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.*;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
|
||||
|
|
@ -80,7 +80,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private File referenceFile;
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private ReferenceSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.gatk.engine.traversals;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.engine.walkers.TestCountReadsWalker;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
|
|
@ -90,7 +89,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
private File output;
|
||||
private TraverseReadsNano traversalEngine = null;
|
||||
|
||||
private IndexedFastaSequenceFile ref = null;
|
||||
private ReferenceSequenceFile ref = null;
|
||||
private GenomeLocParser genomeLocParser = null;
|
||||
private GenomeAnalysisEngine engine = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -418,6 +418,7 @@
|
|||
<queuetest.run>${gatk.queuetests.run}</queuetest.run>
|
||||
<java.io.tmpdir>${java.io.tmpdir}</java.io.tmpdir>
|
||||
</systemPropertyVariables>
|
||||
<groups>sra</groups>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.qc;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequence;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
|
|
@ -70,7 +70,7 @@ public class QCRef extends RefWalker<Integer, Integer> {
|
|||
|
||||
String contigName = "";
|
||||
int contigStart, contigEnd;
|
||||
IndexedFastaSequenceFile uncachedRef;
|
||||
ReferenceSequenceFile uncachedRef;
|
||||
byte[] uncachedBases;
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.gatk.tools.walkers.varianteval;
|
|||
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.samtools.util.IntervalTree;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -282,7 +283,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
|
||||
|
||||
// Ancestral alignments
|
||||
private IndexedFastaSequenceFile ancestralAlignments = null;
|
||||
private ReferenceSequenceFile ancestralAlignments = null;
|
||||
|
||||
// The set of all possible evaluation contexts
|
||||
StratificationManager<VariantStratifier, EvaluationContext> stratManager;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.filters;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
@ -51,7 +51,7 @@ public class VariantFiltrationUnitTest extends BaseTest {
|
|||
@BeforeTest
|
||||
public void before() {
|
||||
// Create GenomeLoc
|
||||
IndexedFastaSequenceFile fasta = CachingIndexedFastaSequenceFile.checkAndCreate(new File(privateTestDir + "iupacFASTA.fasta"));
|
||||
ReferenceSequenceFile fasta = CachingIndexedFastaSequenceFile.checkAndCreate(new File(privateTestDir + "iupacFASTA.fasta"));
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(fasta);
|
||||
chr1 = fasta.getSequenceDictionary().getSequence(0).getSequenceName();
|
||||
genomeLoc = genomeLocParser.createGenomeLoc(chr1, 5, 10);
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||
|
|
@ -44,7 +44,7 @@ public class IndelRealignerUnitTest extends BaseTest {
|
|||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Copyright 2012-2016 Broad Institute, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.readutils;
|
||||
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.BeforeGroups;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import htsjdk.samtools.sra.SRAAccession;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
public class SRAPrintReadsIntegrationTest extends WalkerTest {
|
||||
|
||||
private static class PRTest {
|
||||
final String accession;
|
||||
final String args;
|
||||
final String md5;
|
||||
|
||||
private PRTest(String accession, String args, String md5) {
|
||||
this.accession = accession;
|
||||
this.args = args;
|
||||
this.md5 = md5;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("PRTest(accession='%s', args='%s')", accession, args);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean canResolveNetworkAccession = false;
|
||||
private static String checkAccession = "SRR000123";
|
||||
private static final String REMOTE_ACCESSION_PATTERN = "^[SED]RR[0-9]{6,9}$";
|
||||
|
||||
/**
|
||||
* Are the SRA native libraries loaded and initialized? Does the test accession have a valid name?
|
||||
*/
|
||||
@BeforeGroups(groups = {"sra"})
|
||||
public final void checkIfCanResolve() {
|
||||
// Did SRA successfully load the native libraries and are fully initialized?
|
||||
if (!SRAAccession.isSupported()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Is this is a valid SRA accession?
|
||||
canResolveNetworkAccession = SRAAccession.isValid(checkAccession);
|
||||
}
|
||||
|
||||
/**
|
||||
* Are the SRA native libraries loaded and initialized?
|
||||
*
|
||||
* @throws SkipException if the SRA native libraries are loaded and initialized
|
||||
*/
|
||||
@BeforeMethod
|
||||
public final void assertSRAIsSupported(){
|
||||
if(!SRAAccession.isSupported()){
|
||||
throw new SkipException("Skipping SRA Test because SRA native code is unavailable.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip network SRA Test because cannot resolve remote SRA accession
|
||||
*
|
||||
* @param method Provides information about, and access to, a single method on a class or interface
|
||||
* @param params Method parameters
|
||||
* @throws SkipException if cannot resold an SRA accession
|
||||
*/
|
||||
@BeforeMethod
|
||||
public void skipIfCantResolve(Method method, Object[] params) {
|
||||
String accession = null;
|
||||
|
||||
if (params.length > 0) {
|
||||
Object firstParam = params[0];
|
||||
if (firstParam instanceof PRTest) {
|
||||
accession = ((PRTest)firstParam).accession;
|
||||
}
|
||||
}
|
||||
|
||||
if (accession != null &&
|
||||
accession.matches(REMOTE_ACCESSION_PATTERN) && !canResolveNetworkAccession) {
|
||||
throw new SkipException("Skipping network SRA Test because cannot resolve remote SRA accession '" +
|
||||
checkAccession + "'.");
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createPrintReadsTestData() {
|
||||
return new Object[][]{
|
||||
// Test with local SRA accessions
|
||||
{new PRTest(privateTestDir + "ERR1214757.sra", "", "173ed87acc794a704aa000c6ab5d63a8")},
|
||||
{new PRTest(privateTestDir + "ERR1214757.sra", " -L NC_000001.10:1-50000", "6bc055f028c49bcbca990857e57a6e4b")},
|
||||
{new PRTest(privateTestDir + "ERR1214757.sra", " -L NC_000001.10:500000-1000000", "ab545064b2314971cfae7486ff74d779")},
|
||||
// Tests with remote SRA accessions
|
||||
{new PRTest("ERR1214757", "", "173ed87acc794a704aa000c6ab5d63a8")},
|
||||
{new PRTest("ERR1214757", " -L NC_000001.10:1-50000", "6bc055f028c49bcbca990857e57a6e4b")},
|
||||
{new PRTest("ERR1214757", " -L NC_000001.10:500000-1000000", "ab545064b2314971cfae7486ff74d779")},
|
||||
};
|
||||
}
|
||||
|
||||
@Test(groups = "sra", dataProvider = "PRTest")
|
||||
public void testPrintReads(PRTest params) {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads" +
|
||||
" -R " + params.accession +
|
||||
" -I " + params.accession +
|
||||
params.args +
|
||||
" --no_pg_tag" +
|
||||
" -o %s",
|
||||
Collections.singletonList(params.md5));
|
||||
executeTest("testPrintReads-"+params.args, spec).getFirst();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.gatk.utils.activeregion;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocSortedSet;
|
||||
|
|
@ -168,7 +168,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
/**
|
||||
* See #getActiveRegionReference but with padding == 0
|
||||
*/
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
public byte[] getActiveRegionReference( final ReferenceSequenceFile referenceReader ) {
|
||||
return getActiveRegionReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
|
|
@ -182,21 +182,21 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
* @return a non-null array of bytes holding the reference bases in referenceReader
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
public byte[] getActiveRegionReference( final ReferenceSequenceFile referenceReader, final int padding ) {
|
||||
return getReference(referenceReader, padding, extendedLoc);
|
||||
}
|
||||
|
||||
/**
|
||||
* See #getActiveRegionReference but using the span including regions not the extended span
|
||||
*/
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
public byte[] getFullReference( final ReferenceSequenceFile referenceReader ) {
|
||||
return getFullReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* See #getActiveRegionReference but using the span including regions not the extended span
|
||||
*/
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
public byte[] getFullReference( final ReferenceSequenceFile referenceReader, final int padding ) {
|
||||
return getReference(referenceReader, padding, spanIncludingReads);
|
||||
}
|
||||
|
||||
|
|
@ -211,7 +211,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
* @return a non-null array of bytes holding the reference bases in referenceReader
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
public byte[] getReference( final ReferenceSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
if ( referenceReader == null ) throw new IllegalArgumentException("referenceReader cannot be null");
|
||||
if ( padding < 0 ) throw new IllegalArgumentException("padding must be a positive integer but got " + padding);
|
||||
if ( genomeLoc == null ) throw new IllegalArgumentException("genomeLoc cannot be null");
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.baq;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequence;
|
||||
import htsjdk.samtools.CigarElement;
|
||||
import htsjdk.samtools.CigarOperator;
|
||||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.SAMUtils;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
|
@ -151,7 +151,7 @@ public class BAQ {
|
|||
* @param b band width
|
||||
* @param minBaseQual All bases with Q < minBaseQual are up'd to this value
|
||||
*/
|
||||
public BAQ(final double d, final double e, final int b, final byte minBaseQual, boolean includeClippedBases) {
|
||||
public BAQ(final double d, final double e, final int b, final byte minBaseQual, final boolean includeClippedBases) {
|
||||
cd = d; ce = e; cb = b;
|
||||
this.minBaseQual = minBaseQual;
|
||||
this.includeClippedBases = includeClippedBases;
|
||||
|
|
@ -521,7 +521,7 @@ public class BAQ {
|
|||
}
|
||||
}
|
||||
|
||||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, IndexedFastaSequenceFile refReader) {
|
||||
public BAQCalculationResult calcBAQFromHMM(final SAMRecord read, final ReferenceSequenceFile refReader) {
|
||||
// start is alignment start - band width / 2 - size of first I element, if there is one. Stop is similar
|
||||
int offset = getBandWidth() / 2;
|
||||
long readStart = includeClippedBases ? read.getUnclippedStart() : read.getAlignmentStart();
|
||||
|
|
@ -540,7 +540,7 @@ public class BAQ {
|
|||
// final SimpleTimer total = new SimpleTimer();
|
||||
// final SimpleTimer local = new SimpleTimer();
|
||||
// int n = 0;
|
||||
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) {
|
||||
public BAQCalculationResult calcBAQFromHMM(final byte[] ref, final byte[] query, final byte[] quals, final int queryStart, final int queryEnd ) {
|
||||
// total.restart();
|
||||
if ( queryStart < 0 ) throw new ReviewedGATKException("BUG: queryStart < 0: " + queryStart);
|
||||
if ( queryEnd < 0 ) throw new ReviewedGATKException("BUG: queryEnd < 0: " + queryEnd);
|
||||
|
|
@ -564,7 +564,7 @@ public class BAQ {
|
|||
* @param read
|
||||
* @return
|
||||
*/
|
||||
private final Pair<Integer,Integer> calculateQueryRange(SAMRecord read) {
|
||||
private final Pair<Integer,Integer> calculateQueryRange(final SAMRecord read) {
|
||||
int queryStart = -1, queryStop = -1;
|
||||
int readI = 0;
|
||||
|
||||
|
|
@ -599,7 +599,7 @@ public class BAQ {
|
|||
}
|
||||
|
||||
// we need to pad ref by at least the bandwidth / 2 on either side
|
||||
public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) {
|
||||
public BAQCalculationResult calcBAQFromHMM(final SAMRecord read, final byte[] ref, final int refOffset) {
|
||||
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
|
||||
Pair<Integer, Integer> queryRange = calculateQueryRange(read);
|
||||
if ( queryRange == null ) return null; // read has Ns, or is completely clipped away
|
||||
|
|
@ -642,7 +642,7 @@ public class BAQ {
|
|||
return baqResult;
|
||||
}
|
||||
|
||||
public byte capBaseByBAQ( byte oq, byte bq, int state, int expectedPos ) {
|
||||
public byte capBaseByBAQ( final byte oq, final byte bq, final int state, final int expectedPos ) {
|
||||
byte b;
|
||||
boolean isIndel = stateIsIndel(state);
|
||||
int pos = stateAlignedPosition(state);
|
||||
|
|
@ -664,7 +664,7 @@ public class BAQ {
|
|||
* @param calculationType
|
||||
* @return BQ qualities for use, in case qmode is DONT_MODIFY
|
||||
*/
|
||||
public byte[] baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, CalculationMode calculationType, QualityMode qmode ) {
|
||||
public byte[] baqRead(final SAMRecord read, final ReferenceSequenceFile refReader, final CalculationMode calculationType, final QualityMode qmode ) {
|
||||
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName());
|
||||
|
||||
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
|
||||
|
|
@ -706,7 +706,7 @@ public class BAQ {
|
|||
* @param read
|
||||
* @return
|
||||
*/
|
||||
public boolean excludeReadFromBAQ(SAMRecord read) {
|
||||
public boolean excludeReadFromBAQ(final SAMRecord read) {
|
||||
// keeping mapped reads, regardless of pairing status, or primary alignment status.
|
||||
return read.getReadUnmappedFlag() || read.getReadFailsVendorQualityCheckFlag() || read.getDuplicateReadFlag();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.fasta;
|
||||
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.samtools.sra.SRAAccession;
|
||||
import htsjdk.samtools.sra.SRAIndexedSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import htsjdk.samtools.SAMException;
|
||||
import htsjdk.samtools.reference.FastaSequenceIndex;
|
||||
|
|
@ -192,8 +195,17 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
* Possibly may be better as an overloaded constructor.
|
||||
* @param fastaFile Fasta file to be used as reference
|
||||
* @return A new instance of a CachingIndexedFastaSequenceFile.
|
||||
* @throws IllegalArgumentException if Fasta file is null
|
||||
*/
|
||||
public static CachingIndexedFastaSequenceFile checkAndCreate(final File fastaFile) {
|
||||
public static ReferenceSequenceFile checkAndCreate(final File fastaFile) {
|
||||
if ( fastaFile == null ) {
|
||||
throw new IllegalArgumentException("Fasta file is null");
|
||||
}
|
||||
// maybe it is SRA file?
|
||||
if (SRAAccession.isValid(fastaFile.getPath())) {
|
||||
return new SRAIndexedSequenceFile(new SRAAccession(fastaFile.getPath()));
|
||||
}
|
||||
|
||||
// does the fasta file exist? check that first...
|
||||
if (!fastaFile.exists())
|
||||
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.locusiterator;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.SAMReadGroupRecord;
|
||||
import htsjdk.samtools.SAMRecordIterator;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
import org.broadinstitute.gatk.utils.commandline.CommandLineProgram;
|
||||
|
|
@ -62,7 +62,7 @@ public class LIBSPerformance extends CommandLineProgram {
|
|||
|
||||
@Override
|
||||
public int execute() throws IOException {
|
||||
final IndexedFastaSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
final ReferenceSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
||||
|
||||
final SAMFileReader reader = new SAMFileReader(samFile);
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.*;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.NGSPlatform;
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ import java.util.*;
|
|||
public class ArtificialBAMBuilder {
|
||||
public final static int BAM_SHARD_SIZE = 16384;
|
||||
|
||||
private final IndexedFastaSequenceFile reference;
|
||||
private final ReferenceSequenceFile reference;
|
||||
private final GenomeLocParser parser;
|
||||
|
||||
final int nReadsPerLocus;
|
||||
|
|
@ -73,7 +73,7 @@ public class ArtificialBAMBuilder {
|
|||
|
||||
SAMFileHeader header;
|
||||
|
||||
public ArtificialBAMBuilder(final IndexedFastaSequenceFile reference, int nReadsPerLocus, int nLoci) {
|
||||
public ArtificialBAMBuilder(final ReferenceSequenceFile reference, int nReadsPerLocus, int nLoci) {
|
||||
this.nReadsPerLocus = nReadsPerLocus;
|
||||
this.nLoci = nLoci;
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ public class ArtificialBAMBuilder {
|
|||
createAndSetHeader(1);
|
||||
}
|
||||
|
||||
public IndexedFastaSequenceFile getReference() {
|
||||
public ReferenceSequenceFile getReference() {
|
||||
return reference;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.text;
|
||||
|
||||
import htsjdk.samtools.sra.SRAAccession;
|
||||
import org.broadinstitute.gatk.utils.commandline.ParsingEngine;
|
||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||
import org.broadinstitute.gatk.utils.commandline.Tags;
|
||||
|
|
@ -59,6 +60,13 @@ public class ListFileUtils {
|
|||
* @return a flattened list of the bam files provided
|
||||
*/
|
||||
public static List<SAMReaderID> unpackBAMFileList(final List<String> samFiles, final ParsingEngine parser) {
|
||||
if ( samFiles == null ) {
|
||||
throw new IllegalArgumentException("The SAM files are null");
|
||||
}
|
||||
if ( parser == null ) {
|
||||
throw new IllegalArgumentException("The parser is null");
|
||||
}
|
||||
|
||||
List<SAMReaderID> unpackedReads = new ArrayList<SAMReaderID>();
|
||||
for( String inputFileName: samFiles ) {
|
||||
Tags inputFileNameTags = parser.getTags(inputFileName);
|
||||
|
|
@ -79,10 +87,14 @@ public class ListFileUtils {
|
|||
else if(inputFileName.endsWith("stdin")) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else if(SRAAccession.isValid(inputFileName)) {
|
||||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension and lists of BAM/CRAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM/CRAM file or list " +
|
||||
"of BAM/CRAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM/CRAM files with the .bam/.cram extension " + "" +
|
||||
"or SRA archives and lists of BAM/CRAM/SRA files with the .list extension, but the file %s has " +
|
||||
"neither extension and is not SRA accession. Please ensure that your BAM/CRAM file or list " +
|
||||
"of BAM/CRAM files is in the correct format, update the extension, and try again.", inputFileName));
|
||||
}
|
||||
}
|
||||
return unpackedReads;
|
||||
|
|
|
|||
|
|
@ -29,10 +29,10 @@ package org.broadinstitute.gatk.utils;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.samtools.SAMRecord;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -57,7 +57,7 @@ public class ExampleToCopyUnitTest extends BaseTest {
|
|||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
// example fasta index file, can be deleted if you don't use the reference
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.gatk.utils;
|
|||
|
||||
import com.google.caliper.Param;
|
||||
import com.google.caliper.SimpleBenchmark;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -36,7 +36,7 @@ import java.io.File;
|
|||
* Caliper microbenchmark of genome loc parser
|
||||
*/
|
||||
public class GenomeLocParserBenchmark extends SimpleBenchmark {
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private final int ITERATIONS = 1000000;
|
||||
|
||||
@Param({"NEW", "NONE"})
|
||||
|
|
|
|||
|
|
@ -28,10 +28,9 @@ package org.broadinstitute.gatk.utils;
|
|||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMReadGroupRecord;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||
|
|
@ -53,7 +52,7 @@ public class NGSPlatformUnitTest extends BaseTest {
|
|||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
// example fasta index file, can be deleted if you don't use the reference
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws FileNotFoundException {
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ package org.broadinstitute.gatk.utils.activeregion;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
@ -51,7 +51,7 @@ import java.util.*;
|
|||
public class ActiveRegionUnitTest extends BaseTest {
|
||||
private final static boolean DEBUG = false;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private String contig;
|
||||
private int contigLength;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.utils.baq;
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -52,7 +53,7 @@ public class BAQUnitTest extends BaseTest {
|
|||
private final int startChr = 1;
|
||||
private final int numChr = 2;
|
||||
private final int chrSize = 1000;
|
||||
IndexedFastaSequenceFile fasta = null;
|
||||
ReferenceSequenceFile fasta = null;
|
||||
|
||||
@BeforeMethod
|
||||
public void before() {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ package org.broadinstitute.gatk.utils.fasta;
|
|||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequence;
|
||||
import htsjdk.samtools.SAMSequenceRecord;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
|
|
@ -93,7 +94,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
|
||||
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
|
||||
|
||||
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||
final ReferenceSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||
|
||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
|
||||
|
|
@ -123,7 +124,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
// Tests grabbing sequences around a middle cached value.
|
||||
@Test(dataProvider = "fastas", enabled = true && ! DEBUG)
|
||||
public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
|
||||
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||
final ReferenceSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
|
||||
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
|
||||
|
||||
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
|
||||
|
|
@ -191,7 +192,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
// make sure some bases are lower case and some are upper case
|
||||
@Test(enabled = true)
|
||||
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
|
||||
final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
|
||||
final ReferenceSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
|
||||
final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
|
||||
final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
|
||||
|
||||
|
|
@ -208,9 +209,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state");
|
||||
}
|
||||
|
||||
private int testCases(final IndexedFastaSequenceFile original,
|
||||
final IndexedFastaSequenceFile casePreserving,
|
||||
final IndexedFastaSequenceFile allUpper,
|
||||
private int testCases(final ReferenceSequenceFile original,
|
||||
final ReferenceSequenceFile casePreserving,
|
||||
final ReferenceSequenceFile allUpper,
|
||||
final String contig, final int start, final int stop ) {
|
||||
final String orig = fetchBaseString(original, contig, start, stop);
|
||||
final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
|
||||
|
|
@ -226,7 +227,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private String fetchBaseString(final IndexedFastaSequenceFile reader, final String contig, final int start, final int stop) {
|
||||
private String fetchBaseString(final ReferenceSequenceFile reader, final String contig, final int start, final int stop) {
|
||||
if ( start == -1 )
|
||||
return new String(reader.getSequence(contig).getBases());
|
||||
else
|
||||
|
|
|
|||
|
|
@ -25,8 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.refdata.tracks;
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Feature;
|
||||
import htsjdk.tribble.FeatureCodec;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
|
|
@ -66,7 +65,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
|||
public void setup() {
|
||||
File referenceFile = new File(b36KGReference);
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
manager = new FeatureManager();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.gatk.utils.refdata.tracks;
|
||||
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Tribble;
|
||||
import htsjdk.tribble.index.Index;
|
||||
import htsjdk.tribble.util.LittleEndianOutputStream;
|
||||
|
|
@ -54,7 +54,7 @@ import java.nio.channels.FileChannel;
|
|||
*/
|
||||
public class RMDTrackBuilderUnitTest extends BaseTest {
|
||||
private RMDTrackBuilder builder;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private ReferenceSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeMethod
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.refdata.utils;
|
||||
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
|
|
@ -43,7 +43,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
|
|||
@SuppressWarnings("unchecked")
|
||||
public void testCloseFilePointers() throws IOException {
|
||||
final String chr = "20";
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
File file = new File(privateTestDir + "NA12878.hg19.example1.vcf");
|
||||
VCFCodec codec = new VCFCodec();
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@
|
|||
package org.broadinstitute.gatk.utils.sam;
|
||||
|
||||
import htsjdk.samtools.SAMReadGroupRecord;
|
||||
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
|
|
@ -223,7 +223,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
@Test (enabled = true)
|
||||
public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException {
|
||||
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
final int readLength = 76;
|
||||
|
||||
|
|
@ -239,7 +239,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
@Test (enabled = true)
|
||||
public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {
|
||||
|
||||
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
|
||||
final int readLength = 76;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue