IndexedFastaSequenceFile is now in Picard; transitioning to that implementation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3701 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b6af17b82d
commit
4995950d04
|
|
@ -23,15 +23,17 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.fasta;
|
package net.sf.picard.reference;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import static org.broadinstitute.sting.utils.fasta.FastaSequenceIndexBuilder.Status.*;
|
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.FastaSequenceIndex;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds FastaSequenceIndex from fasta file.
|
* Builds FastaSequenceIndex from fasta file.
|
||||||
* Produces fai file with same output as samtools faidx
|
* Produces fai file with same output as samtools faidx
|
||||||
|
|
@ -46,6 +48,7 @@ public class FastaSequenceIndexBuilder {
|
||||||
// vars that store information about the contig that is currently being read
|
// vars that store information about the contig that is currently being read
|
||||||
String contig;
|
String contig;
|
||||||
long location, size, bytesPerLine, basesPerLine, basesThisLine;
|
long location, size, bytesPerLine, basesPerLine, basesThisLine;
|
||||||
|
int thisSequenceIndex = 0;
|
||||||
|
|
||||||
// vars that keep loop state
|
// vars that keep loop state
|
||||||
byte lastByte = 0, currentByte = 0, nextByte = 0;
|
byte lastByte = 0, currentByte = 0, nextByte = 0;
|
||||||
|
|
@ -241,7 +244,7 @@ public class FastaSequenceIndexBuilder {
|
||||||
* Reset iterators and add contig to sequence index
|
* Reset iterators and add contig to sequence index
|
||||||
*/
|
*/
|
||||||
private void finishReadingContig(FastaSequenceIndex sequenceIndex) {
|
private void finishReadingContig(FastaSequenceIndex sequenceIndex) {
|
||||||
sequenceIndex.addIndexEntry(contig, location, size, (int) basesPerLine, (int) bytesPerLine);
|
sequenceIndex.add(new FastaSequenceIndexEntry(contig, location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
|
||||||
status = Status.NONE;
|
status = Status.NONE;
|
||||||
contig = "";
|
contig = "";
|
||||||
size = 0;
|
size = 0;
|
||||||
|
|
@ -271,11 +274,9 @@ public class FastaSequenceIndexBuilder {
|
||||||
faiFile.getAbsolutePath()), e);
|
faiFile.getAbsolutePath()), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator<FastaSequenceIndexEntry> iter = sequenceIndex.iterator();
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (iter.hasNext()) {
|
for(FastaSequenceIndexEntry entry: sequenceIndex) {
|
||||||
out.write(iter.next().toIndexFileLine());
|
out.write(toIndexFileLine(entry));
|
||||||
out.newLine();
|
out.newLine();
|
||||||
}
|
}
|
||||||
out.close();
|
out.close();
|
||||||
|
|
@ -284,4 +285,13 @@ public class FastaSequenceIndexBuilder {
|
||||||
throw new StingException(String.format("An error occurred while writing file %s", e));
|
throw new StingException(String.format("An error occurred while writing file %s", e));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print string in format of fai file line
|
||||||
|
* @return Contig as one line in a fai file
|
||||||
|
*/
|
||||||
|
private static String toIndexFileLine(FastaSequenceIndexEntry entry) {
|
||||||
|
return String.format("%s\t%d\t%d\t%d\t%d", entry.getContig(), entry.getSize(), entry.getLocation(), entry.getBasesPerLine(), entry.getBytesPerLine());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -4,12 +4,12 @@ import org.broadinstitute.sting.alignment.Aligner;
|
||||||
import org.broadinstitute.sting.alignment.Alignment;
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A test harness to ensure that the perfect aligner works.
|
* A test harness to ensure that the perfect aligner works.
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
|
|
@ -9,6 +8,8 @@ import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Presents data sharded by locus to the traversal engine.
|
* Presents data sharded by locus to the traversal engine.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,11 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Present data sharded by read to a traversal engine.
|
* Present data sharded by read to a traversal engine.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
@ -14,6 +13,7 @@ import net.sf.samtools.SAMSequenceRecord;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 22, 2009
|
* Date: May 22, 2009
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,13 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 8, 2009
|
* Date: May 8, 2009
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||||
|
|
||||||
|
|
@ -36,6 +35,7 @@ import java.util.*;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import net.sf.samtools.SAMFileSpan;
|
import net.sf.samtools.SAMFileSpan;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A sharding strategy for loci based on reading of the index.
|
* A sharding strategy for loci based on reading of the index.
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.shards;
|
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -25,17 +25,13 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceIndex;
|
import net.sf.picard.reference.FastaSequenceIndexBuilder;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceIndexBuilder;
|
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import net.sf.picard.sam.CreateSequenceDictionary;
|
import net.sf.picard.sam.CreateSequenceDictionary;
|
||||||
import org.broadinstitute.sting.utils.file.FSLock;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.picard.reference.FastaSequenceIndex;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.channels.FileLock;
|
import java.nio.channels.FileLock;
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde
|
||||||
import org.broadinstitute.sting.gatk.io.*;
|
import org.broadinstitute.sting.gatk.io.*;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||||
|
|
||||||
import javax.management.MBeanServer;
|
import javax.management.MBeanServer;
|
||||||
|
|
@ -24,6 +23,8 @@ import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.FutureTask;
|
import java.util.concurrent.FutureTask;
|
||||||
import java.lang.management.ManagementFactory;
|
import java.lang.management.ManagementFactory;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A microscheduler that schedules shards according to a tree-like structure.
|
* A microscheduler that schedules shards according to a tree-like structure.
|
||||||
* Requires a special walker tagged with a 'TreeReducible' interface.
|
* Requires a special walker tagged with a 'TreeReducible' interface.
|
||||||
|
|
|
||||||
|
|
@ -13,12 +13,12 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
|
||||||
/** A micro-scheduling manager for single-threaded execution of a traversal. */
|
/** A micro-scheduling manager for single-threaded execution of a traversal. */
|
||||||
|
|
|
||||||
|
|
@ -38,12 +38,13 @@ import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
|
||||||
|
|
@ -36,10 +36,10 @@ import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.hybridselection;
|
package org.broadinstitute.sting.playground.gatk.walkers.hybridselection;
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.picard.util.Interval;
|
import net.sf.picard.util.Interval;
|
||||||
import net.sf.picard.util.IntervalList;
|
import net.sf.picard.util.IntervalList;
|
||||||
import net.sf.picard.util.OverlapDetector;
|
import net.sf.picard.util.OverlapDetector;
|
||||||
|
|
@ -49,7 +50,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
@ -300,7 +300,6 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
IndexedFastaSequenceFile seqFile = null;
|
IndexedFastaSequenceFile seqFile = null;
|
||||||
|
|
||||||
private double calculateGC(GenomeLoc target) {
|
private double calculateGC(GenomeLoc target) {
|
||||||
try {
|
|
||||||
if (seqFile == null) {
|
if (seqFile == null) {
|
||||||
seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
||||||
}
|
}
|
||||||
|
|
@ -313,9 +312,5 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
}
|
}
|
||||||
return ( (double) gcCount ) / ((double) refSeq.getBases().length);
|
return ( (double) gcCount ) / ((double) refSeq.getBases().length);
|
||||||
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
throw new RuntimeException(ioe);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,268 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.fasta;
|
|
||||||
|
|
||||||
import net.sf.picard.PicardException;
|
|
||||||
import net.sf.picard.io.IoUtil;
|
|
||||||
|
|
||||||
import java.util.Scanner;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.regex.MatchResult;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a fasta index file (.fai), as generated by `samtools faidx`.
|
|
||||||
*/
|
|
||||||
public class FastaSequenceIndex implements Iterable {
|
|
||||||
/**
|
|
||||||
* Store the entries. Use a LinkedHashMap for consistent iteration in insertion order.
|
|
||||||
*/
|
|
||||||
private final Map<String,FastaSequenceIndexEntry> sequenceEntries = new LinkedHashMap<String,FastaSequenceIndexEntry>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build a sequence index from the specified file.
|
|
||||||
* @param indexFile File to open.
|
|
||||||
* @throws FileNotFoundException if the index file cannot be found.
|
|
||||||
*/
|
|
||||||
protected FastaSequenceIndex( File indexFile ) throws FileNotFoundException {
|
|
||||||
if(!indexFile.exists())
|
|
||||||
throw new FileNotFoundException(String.format("Fasta index file is missing: %s",indexFile.getAbsolutePath()));
|
|
||||||
|
|
||||||
IoUtil.assertFileIsReadable(indexFile);
|
|
||||||
parseIndexFile(indexFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build an empty sequence index. Entries can be added later.
|
|
||||||
*/
|
|
||||||
protected FastaSequenceIndex() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parse the contents of an index file, caching the results internally.
|
|
||||||
* @param indexFile File to parse.
|
|
||||||
* @throws FileNotFoundException Thrown if file could not be opened.
|
|
||||||
*/
|
|
||||||
private void parseIndexFile(File indexFile) throws FileNotFoundException {
|
|
||||||
Scanner scanner = new Scanner(indexFile);
|
|
||||||
|
|
||||||
while( scanner.hasNext() ) {
|
|
||||||
// Tokenize and validate the index line.
|
|
||||||
String result = scanner.findInLine("(.+)\\t+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
|
|
||||||
if( result == null )
|
|
||||||
throw new PicardException("Found invalid line in index file:" + scanner.nextLine());
|
|
||||||
MatchResult tokens = scanner.match();
|
|
||||||
if( tokens.groupCount() != 5 )
|
|
||||||
throw new PicardException("Found invalid line in index file:" + scanner.nextLine());
|
|
||||||
|
|
||||||
// Skip past the line separator
|
|
||||||
scanner.nextLine();
|
|
||||||
|
|
||||||
// Parse the index line.
|
|
||||||
String contig = tokens.group(1);
|
|
||||||
long size = Long.valueOf(tokens.group(2));
|
|
||||||
long location = Long.valueOf(tokens.group(3));
|
|
||||||
int basesPerLine = Integer.valueOf(tokens.group(4));
|
|
||||||
int bytesPerLine = Integer.valueOf(tokens.group(5));
|
|
||||||
|
|
||||||
// Build sequence structure
|
|
||||||
sequenceEntries.put( contig,new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does the given contig name have a corresponding entry?
|
|
||||||
* @param contigName The contig name for which to search.
|
|
||||||
* @return True if contig name is present; false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean hasIndexEntry( String contigName ) {
|
|
||||||
return sequenceEntries.containsKey(contigName);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieve the index entry associated with the given contig.
|
|
||||||
* @param contigName Name of the contig for which to search.
|
|
||||||
* @return Index entry associated with the given contig.
|
|
||||||
* @throws PicardException if the associated index entry can't be found.
|
|
||||||
*/
|
|
||||||
public FastaSequenceIndexEntry getIndexEntry( String contigName ) {
|
|
||||||
if( !hasIndexEntry(contigName) )
|
|
||||||
throw new PicardException("Unable to find entry for contig: " + contigName);
|
|
||||||
|
|
||||||
return sequenceEntries.get(contigName);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an iterator which can iterate through all entries in a fasta index.
|
|
||||||
* @return iterator over all fasta index entries.
|
|
||||||
*/
|
|
||||||
public Iterator<FastaSequenceIndexEntry> iterator() {
|
|
||||||
return sequenceEntries.values().iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of elements in the index.
|
|
||||||
* @return Number of elements in the index.
|
|
||||||
*/
|
|
||||||
public int size() {
|
|
||||||
return sequenceEntries.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds entry to index. Used by Fai file generator to create index entry on the fly.
|
|
||||||
* @param contig The name of the contig
|
|
||||||
* @param location Byte-referenced location of contig in file
|
|
||||||
* @param size Number of bases in contig
|
|
||||||
* @param basesPerLine Number of bases in each line. Must be uniform.
|
|
||||||
* @param bytesPerLine Number of bytes in each line. Must be uniform.
|
|
||||||
*/
|
|
||||||
public void addIndexEntry(String contig, long location, long size, int basesPerLine, int bytesPerLine) {
|
|
||||||
sequenceEntries.put( contig,new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine) );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compare two FastaSequenceIndex objects. Built for use in testing. No hash function has been created.
|
|
||||||
* @param other Another FastaSequenceIndex to compare
|
|
||||||
* @return True if index has the same entries as other instance, in the same order
|
|
||||||
*/
|
|
||||||
public boolean equals(FastaSequenceIndex other) {
|
|
||||||
Iterator<FastaSequenceIndexEntry> iter = this.iterator();
|
|
||||||
Iterator<FastaSequenceIndexEntry> otherIter = other.iterator();
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
if (!otherIter.hasNext())
|
|
||||||
return false;
|
|
||||||
if (!iter.next().equals(otherIter.next()))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Hold an individual entry in a fasta sequence index file.
|
|
||||||
*/
|
|
||||||
class FastaSequenceIndexEntry {
|
|
||||||
private final String contig;
|
|
||||||
private final long location;
|
|
||||||
private final long size;
|
|
||||||
private final int basesPerLine;
|
|
||||||
private final int bytesPerLine;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new entry with the given parameters.
|
|
||||||
* @param contig Contig this entry represents.
|
|
||||||
* @param location Location (byte coordinate) in the fasta file.
|
|
||||||
* @param size The number of bases in the contig.
|
|
||||||
* @param basesPerLine How many bases are on each line.
|
|
||||||
* @param bytesPerLine How many bytes are on each line (includes newline characters).
|
|
||||||
*/
|
|
||||||
public FastaSequenceIndexEntry( String contig,
|
|
||||||
long location,
|
|
||||||
long size,
|
|
||||||
int basesPerLine,
|
|
||||||
int bytesPerLine ) {
|
|
||||||
this.contig = contig;
|
|
||||||
this.location = location;
|
|
||||||
this.size = size;
|
|
||||||
this.basesPerLine = basesPerLine;
|
|
||||||
this.bytesPerLine = bytesPerLine;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the contig associated with this entry.
|
|
||||||
* @return String representation of the contig.
|
|
||||||
*/
|
|
||||||
public String getContig() {
|
|
||||||
return contig;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the location of this contig within the fasta.
|
|
||||||
* @return seek position within the fasta.
|
|
||||||
*/
|
|
||||||
public long getLocation() {
|
|
||||||
return location;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the size, in bytes, of the data in the contig.
|
|
||||||
* @return size of the contig bases in bytes.
|
|
||||||
*/
|
|
||||||
public long getSize() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the number of bases in a given line.
|
|
||||||
* @return Number of bases in the fasta line.
|
|
||||||
*/
|
|
||||||
public int getBasesPerLine() {
|
|
||||||
return basesPerLine;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* How many bytes (bases + whitespace) are consumed by the
|
|
||||||
* given line?
|
|
||||||
* @return Number of bytes in a line.
|
|
||||||
*/
|
|
||||||
public int getBytesPerLine() {
|
|
||||||
return bytesPerLine;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For debugging. Emit the contents of each contig line.
|
|
||||||
* @return A string representation of the contig line.
|
|
||||||
*/
|
|
||||||
public String toString() {
|
|
||||||
return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig,
|
|
||||||
location,
|
|
||||||
size,
|
|
||||||
basesPerLine,
|
|
||||||
bytesPerLine );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Print string in format of fai file line
|
|
||||||
* @return Contig as one line in a fai file
|
|
||||||
*/
|
|
||||||
public String toIndexFileLine() {
|
|
||||||
return String.format("%s\t%d\t%d\t%d\t%d", contig, size, location, basesPerLine, bytesPerLine);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compare entry to another instance
|
|
||||||
* @param other another FastaSequenceIndexEntry
|
|
||||||
* @return True if each has the same name, location, size, basesPerLine and bytesPerLine
|
|
||||||
*/
|
|
||||||
public boolean equals(FastaSequenceIndexEntry other) {
|
|
||||||
return (contig.equals(other.contig) && size == other.size && location == other.location
|
|
||||||
&& basesPerLine == other.basesPerLine && bytesPerLine == other.bytesPerLine);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,285 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.fasta;
|
|
||||||
|
|
||||||
import net.sf.picard.PicardException;
|
|
||||||
import net.sf.picard.io.IoUtil;
|
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import net.sf.samtools.SAMTextHeaderCodec;
|
|
||||||
import net.sf.samtools.util.AsciiLineReader;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A fasta file driven by an index for fast, concurrent lookups. Supports two interfaces:
|
|
||||||
* the ReferenceSequenceFile for old-style, stateful lookups and a direct getter.
|
|
||||||
*/
|
|
||||||
public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
|
|
||||||
/**
|
|
||||||
* Size of the read buffer.
|
|
||||||
*/
|
|
||||||
private static final int BUFFER_SIZE = 128 * 1024;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stores the main fasta file.
|
|
||||||
*/
|
|
||||||
private final File file;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The interface facilitating direct access to the fasta.
|
|
||||||
*/
|
|
||||||
private FileChannel channel;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A representation of the sequence dictionary, stored alongside the fasta in a .dict file.
|
|
||||||
*/
|
|
||||||
private SAMSequenceDictionary sequenceDictionary = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A representation of the sequence index, stored alongside the fasta in a .fasta.fai file.
|
|
||||||
*/
|
|
||||||
private FastaSequenceIndex index;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An iterator into the fasta index, for traversing iteratively across the fasta.
|
|
||||||
*/
|
|
||||||
private Iterator<FastaSequenceIndexEntry> indexIterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
|
||||||
* @param file The file to open.
|
|
||||||
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
|
|
||||||
*/
|
|
||||||
public IndexedFastaSequenceFile(File file) throws FileNotFoundException {
|
|
||||||
this.file = file;
|
|
||||||
FileInputStream in = new FileInputStream(file);
|
|
||||||
channel = in.getChannel();
|
|
||||||
|
|
||||||
loadDictionary(file);
|
|
||||||
loadIndex(file);
|
|
||||||
sanityCheckDictionaryAgainstIndex();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
|
|
||||||
* @param file The file to open.
|
|
||||||
* @param sequenceIndex FastaSequenceIndex that was previously created
|
|
||||||
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
|
|
||||||
*/
|
|
||||||
public IndexedFastaSequenceFile(File file, FastaSequenceIndex sequenceIndex) throws FileNotFoundException {
|
|
||||||
this.file = file;
|
|
||||||
FileInputStream in = new FileInputStream(file);
|
|
||||||
channel = in.getChannel();
|
|
||||||
|
|
||||||
loadDictionary(file);
|
|
||||||
// Temporary change: sequenceIndex is passed in directly. See comments in ReferenceDataSource.
|
|
||||||
index = sequenceIndex;
|
|
||||||
sanityCheckDictionaryAgainstIndex();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Always returns true for this implementation.
|
|
||||||
* @return True.
|
|
||||||
*/
|
|
||||||
public boolean isIndexed() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads a dictionary, if available.
|
|
||||||
* @param fastaFile File to check for a match.
|
|
||||||
*/
|
|
||||||
private void loadDictionary( File fastaFile ) {
|
|
||||||
// Try and locate the dictionary
|
|
||||||
String dictionaryName = fastaFile.getAbsolutePath();
|
|
||||||
dictionaryName = dictionaryName.substring(0, getFastaFileExtensionStart(dictionaryName));
|
|
||||||
dictionaryName += ".dict";
|
|
||||||
final File dictionary = new File(dictionaryName);
|
|
||||||
if (!dictionary.exists())
|
|
||||||
throw new PicardException("Unable to load .dict file. Dictionary is required for the indexed fasta reader.");
|
|
||||||
|
|
||||||
IoUtil.assertFileIsReadable(dictionary);
|
|
||||||
|
|
||||||
try {
|
|
||||||
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
|
|
||||||
final SAMFileHeader header = codec.decode(new AsciiLineReader(new FileInputStream(dictionary)),
|
|
||||||
dictionary.toString());
|
|
||||||
if (header.getSequenceDictionary() != null && header.getSequenceDictionary().size() > 0) {
|
|
||||||
this.sequenceDictionary = header.getSequenceDictionary();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
throw new PicardException("Could not open sequence dictionary file: " + dictionaryName, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the index of the first character in the fasta file's extension.
|
|
||||||
* @param filename The filename of the fasta. Must not be null, and must end with either '.fasta' or '.fa'.
|
|
||||||
* @return The index of the start of the extension within the filename. If neither '.fasta' nor '.fa' are
|
|
||||||
* present in the filename, a StingException will be thrown.
|
|
||||||
*/
|
|
||||||
private int getFastaFileExtensionStart( String filename ) {
|
|
||||||
if( filename.endsWith(".fasta") )
|
|
||||||
return filename.lastIndexOf(".fasta");
|
|
||||||
else if( filename.endsWith(".fa") )
|
|
||||||
return filename.lastIndexOf(".fa");
|
|
||||||
else
|
|
||||||
throw new StingException("Invalid fasta filename; fasta filename must end with '.fasta' or '.fa'.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads the index for the fasta, if present. Throws an exception if now present.
|
|
||||||
* @param fastaFile FASTA file to load.
|
|
||||||
* @throws FileNotFoundException if FASTA file cannot be found.
|
|
||||||
*/
|
|
||||||
private void loadIndex( File fastaFile ) throws FileNotFoundException {
|
|
||||||
File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
|
||||||
if (!indexFile.exists())
|
|
||||||
throw new PicardException(String.format("Unable to load fasta index file %s. "+
|
|
||||||
"Please create it using 'samtools faidx'.",indexFile.getAbsolutePath()));
|
|
||||||
index = new FastaSequenceIndex(indexFile);
|
|
||||||
reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Do some basic checking to make sure the dictionary and the index match.
|
|
||||||
*/
|
|
||||||
private void sanityCheckDictionaryAgainstIndex() {
|
|
||||||
// Make sure dictionary and index are the same size.
|
|
||||||
if( sequenceDictionary.getSequences().size() != index.size() )
|
|
||||||
throw new PicardException("Sequence dictionary and index contain different numbers of contigs");
|
|
||||||
|
|
||||||
for( SAMSequenceRecord sequenceRecord: sequenceDictionary.getSequences() ) {
|
|
||||||
// Make sure sequence name is present in the index.
|
|
||||||
String sequenceName = sequenceRecord.getSequenceName();
|
|
||||||
if( !index.hasIndexEntry(sequenceName) )
|
|
||||||
throw new PicardException("Index does not contain dictionary entry: " + sequenceName );
|
|
||||||
|
|
||||||
// Make sure sequence length matches index length.
|
|
||||||
if( sequenceRecord.getSequenceLength() != index.getIndexEntry(sequenceName).getSize())
|
|
||||||
throw new PicardException("Index length does not match dictionary length for contig: " + sequenceName );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the sequence dictionary for the fasta file.
|
|
||||||
* @return sequence dictionary of the fasta.
|
|
||||||
*/
|
|
||||||
public SAMSequenceDictionary getSequenceDictionary() {
|
|
||||||
return sequenceDictionary;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the complete sequence described by this contig.
|
|
||||||
* @param contig contig whose data should be returned.
|
|
||||||
* @return The full sequence associated with this contig.
|
|
||||||
*/
|
|
||||||
public ReferenceSequence getSequence( String contig ) {
|
|
||||||
return getSubsequenceAt( contig, 1, (int)index.getIndexEntry(contig).getSize() );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the subsequence of the contig in the range [start,stop]
|
|
||||||
* @param contig Contig whose subsequence to retrieve.
|
|
||||||
* @param start inclusive, 1-based start of region.
|
|
||||||
* @param stop inclusive, 1-based stop of region.
|
|
||||||
* @return The partial reference sequence associated with this range.
|
|
||||||
*/
|
|
||||||
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
|
|
||||||
if(start > stop)
|
|
||||||
throw new PicardException(String.format("Malformed query; start point %d lies after end point %d",start,stop));
|
|
||||||
|
|
||||||
FastaSequenceIndexEntry indexEntry = index.getIndexEntry(contig);
|
|
||||||
|
|
||||||
if(stop > indexEntry.getSize())
|
|
||||||
throw new PicardException("Query asks for data past end of contig");
|
|
||||||
|
|
||||||
int length = (int)(stop - start + 1);
|
|
||||||
|
|
||||||
byte[] target = new byte[length];
|
|
||||||
ByteBuffer targetBuffer = ByteBuffer.wrap(target);
|
|
||||||
|
|
||||||
final int basesPerLine = indexEntry.getBasesPerLine();
|
|
||||||
final int bytesPerLine = indexEntry.getBytesPerLine();
|
|
||||||
final int terminatorLength = bytesPerLine - basesPerLine;
|
|
||||||
|
|
||||||
long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine;
|
|
||||||
|
|
||||||
// Allocate a 128K buffer for reading in sequence data.
|
|
||||||
ByteBuffer channelBuffer = ByteBuffer.allocate(BUFFER_SIZE);
|
|
||||||
|
|
||||||
while(targetBuffer.position() < length) {
|
|
||||||
// If the bufferOffset is currently within the eol characters in the string, push the bufferOffset forward to the next printable character.
|
|
||||||
startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0);
|
|
||||||
|
|
||||||
try {
|
|
||||||
startOffset += channel.read(channelBuffer,indexEntry.getLocation()+startOffset);
|
|
||||||
}
|
|
||||||
catch(IOException ex) {
|
|
||||||
throw new PicardException("Unable to map FASTA file into memory.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset the buffer for outbound transfers.
|
|
||||||
channelBuffer.flip();
|
|
||||||
|
|
||||||
// Calculate the size of the next run of bases based on the contents we've already retrieved.
|
|
||||||
final int positionInContig = (int)start-1+targetBuffer.position();
|
|
||||||
final int nextBaseSpan = Math.min(basesPerLine-positionInContig%basesPerLine,length-targetBuffer.position());
|
|
||||||
// Cap the bytes to transfer by limiting the nextBaseSpan to the size of the channel buffer.
|
|
||||||
int bytesToTransfer = Math.min(nextBaseSpan,channelBuffer.capacity());
|
|
||||||
|
|
||||||
channelBuffer.limit(channelBuffer.position()+bytesToTransfer);
|
|
||||||
|
|
||||||
while(channelBuffer.hasRemaining()) {
|
|
||||||
targetBuffer.put(channelBuffer);
|
|
||||||
|
|
||||||
bytesToTransfer = Math.min(basesPerLine,length-targetBuffer.position());
|
|
||||||
channelBuffer.limit(Math.min(channelBuffer.position()+bytesToTransfer+terminatorLength,channelBuffer.capacity()));
|
|
||||||
channelBuffer.position(Math.min(channelBuffer.position()+terminatorLength,channelBuffer.capacity()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset the buffer for inbound transfers.
|
|
||||||
channelBuffer.flip();
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ReferenceSequence( contig, sequenceDictionary.getSequenceIndex(contig), target );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the next sequence if available, or null if not present.
|
|
||||||
* @return next sequence if available, or null if not present.
|
|
||||||
*/
|
|
||||||
public ReferenceSequence nextSequence() {
|
|
||||||
if( !indexIterator.hasNext() )
|
|
||||||
return null;
|
|
||||||
return getSequence( indexIterator.next().getContig() );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reset the iterator over the index.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void reset() {
|
|
||||||
indexIterator = index.iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A simple toString implementation for debugging.
|
|
||||||
* @return String representation of the file.
|
|
||||||
*/
|
|
||||||
public String toString() {
|
|
||||||
return this.file.getAbsolutePath();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -23,9 +23,8 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.fasta;
|
package net.sf.picard.reference;
|
||||||
|
|
||||||
import net.sf.picard.PicardException;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
|
@ -61,7 +60,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
||||||
fastaFile = new File(validationDataLocation + "exampleFASTA.fasta");
|
fastaFile = new File(validationDataLocation + "exampleFASTA.fasta");
|
||||||
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
||||||
FastaSequenceIndex index = builder.createIndex();
|
FastaSequenceIndex index = builder.createIndex();
|
||||||
controlIndex.addIndexEntry("chr1", 6, 100000, 60, 61);
|
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
|
||||||
|
|
||||||
Assert.assertTrue(index.equals(controlIndex));
|
Assert.assertTrue(index.equals(controlIndex));
|
||||||
}
|
}
|
||||||
|
|
@ -78,7 +77,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
||||||
fastaFile = new File(validationDataLocation + "exampleFASTA-windows.fasta");
|
fastaFile = new File(validationDataLocation + "exampleFASTA-windows.fasta");
|
||||||
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
||||||
FastaSequenceIndex index = builder.createIndex();
|
FastaSequenceIndex index = builder.createIndex();
|
||||||
controlIndex.addIndexEntry("chr2", 7, 29, 7, 9);
|
controlIndex.add(new FastaSequenceIndexEntry("chr2", 7, 29, 7, 9,0));
|
||||||
|
|
||||||
Assert.assertTrue(index.equals(controlIndex));
|
Assert.assertTrue(index.equals(controlIndex));
|
||||||
}
|
}
|
||||||
|
|
@ -94,8 +93,8 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
||||||
fastaFile = new File(validationDataLocation + "exampleFASTA-combined.fasta");
|
fastaFile = new File(validationDataLocation + "exampleFASTA-combined.fasta");
|
||||||
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
||||||
FastaSequenceIndex index = builder.createIndex();
|
FastaSequenceIndex index = builder.createIndex();
|
||||||
controlIndex.addIndexEntry("chr1", 6, 100000, 60, 61);
|
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
|
||||||
controlIndex.addIndexEntry("chr2", 101680, 29, 7, 9);
|
controlIndex.add(new FastaSequenceIndexEntry("chr2", 101680, 29, 7, 9,1));
|
||||||
|
|
||||||
Assert.assertTrue(index.equals(controlIndex));
|
Assert.assertTrue(index.equals(controlIndex));
|
||||||
}
|
}
|
||||||
|
|
@ -111,9 +110,9 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
||||||
fastaFile = new File(validationDataLocation + "exampleFASTA-3contigs.fasta");
|
fastaFile = new File(validationDataLocation + "exampleFASTA-3contigs.fasta");
|
||||||
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
builder = new FastaSequenceIndexBuilder(fastaFile, progress);
|
||||||
FastaSequenceIndex index = builder.createIndex();
|
FastaSequenceIndex index = builder.createIndex();
|
||||||
controlIndex.addIndexEntry("chr1", 6, 17, 5, 6);
|
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 17, 5, 6,0));
|
||||||
controlIndex.addIndexEntry("chr2", 35, 21, 7, 8);
|
controlIndex.add(new FastaSequenceIndexEntry("chr2", 35, 21, 7, 8,1));
|
||||||
controlIndex.addIndexEntry("chr3", 66, 100, 10, 11);
|
controlIndex.add(new FastaSequenceIndexEntry("chr3", 66, 100, 10, 11,2));
|
||||||
|
|
||||||
Assert.assertTrue(index.equals(controlIndex));
|
Assert.assertTrue(index.equals(controlIndex));
|
||||||
}
|
}
|
||||||
|
|
@ -7,7 +7,6 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -19,6 +18,7 @@ import java.io.FileNotFoundException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic unit test for RecalData
|
* Basic unit test for RecalData
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -19,6 +18,8 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 27, 2009
|
* Date: May 27, 2009
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
@ -18,6 +17,7 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 21, 2009
|
* Date: May 21, 2009
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
import static junit.framework.Assert.fail;
|
import static junit.framework.Assert.fail;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
|
|
@ -11,7 +12,6 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@ import static junit.framework.Assert.fail;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,12 @@ package org.broadinstitute.sting.gatk.refdata;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,13 @@ import edu.mit.broad.picard.genotype.geli.GeliFileReader;
|
||||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broad.tribble.gelitext.GeliTextCodec;
|
import org.broad.tribble.gelitext.GeliTextCodec;
|
||||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||||
import org.broad.tribble.util.AsciiLineReader;
|
import org.broad.tribble.util.AsciiLineReader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
|
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
|
||||||
|
|
@ -43,11 +43,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() {
|
public static void beforeClass() {
|
||||||
try {
|
|
||||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "/reference/human_b36_both.fasta")); // TODO: make human reference use BaseTest
|
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "/reference/human_b36_both.fasta")); // TODO: make human reference use BaseTest
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
Assert.fail("Unable to load reference " + oneKGLocation + "/reference/human_b36_both.fasta");
|
|
||||||
}
|
|
||||||
GenomeLocParser.setupRefContigOrdering(seq);
|
GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
@ -25,6 +24,8 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* test out pieces of the VCF 4 codec.
|
* test out pieces of the VCF 4 codec.
|
||||||
*/
|
*/
|
||||||
|
|
@ -36,11 +37,7 @@ public class VCF4UnitTest extends BaseTest {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setupContig() {
|
public static void setupContig() {
|
||||||
IndexedFastaSequenceFile seq;
|
IndexedFastaSequenceFile seq;
|
||||||
try {
|
|
||||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new StingException("unable to load the sequence dictionary");
|
|
||||||
}
|
|
||||||
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
|
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,11 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -100,11 +100,7 @@ public class RMDTrackManagerUnitTest extends BaseTest {
|
||||||
// @Test used only to determine how fast queries are, don't uncomment! (unless you know what you're doing).
|
// @Test used only to determine how fast queries are, don't uncomment! (unless you know what you're doing).
|
||||||
public void testSpeedOfRealQuery() {
|
public void testSpeedOfRealQuery() {
|
||||||
IndexedFastaSequenceFile file = null;
|
IndexedFastaSequenceFile file = null;
|
||||||
try {
|
|
||||||
file = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
file = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
Assert.assertTrue(false);
|
|
||||||
}
|
|
||||||
final int intervalSize = 10000000;
|
final int intervalSize = 10000000;
|
||||||
GenomeLocParser.setupRefContigOrdering(file.getSequenceDictionary());
|
GenomeLocParser.setupRefContigOrdering(file.getSequenceDictionary());
|
||||||
RMDTrackManager manager = new RMDTrackManager();
|
RMDTrackManager manager = new RMDTrackManager();
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.traversals;
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||||
|
|
@ -12,7 +13,6 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource
|
||||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
|
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -106,12 +106,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testUnmappedReadCount() {
|
public void testUnmappedReadCount() {
|
||||||
IndexedFastaSequenceFile ref = null;
|
IndexedFastaSequenceFile ref = null;
|
||||||
try {
|
|
||||||
ref = new IndexedFastaSequenceFile(refFile);
|
ref = new IndexedFastaSequenceFile(refFile);
|
||||||
}
|
|
||||||
catch (FileNotFoundException ex) {
|
|
||||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
|
||||||
}
|
|
||||||
GenomeLocParser.setupRefContigOrdering(ref);
|
GenomeLocParser.setupRefContigOrdering(ref);
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,12 @@ import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic unit test for GenomeLoc
|
* Basic unit test for GenomeLoc
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.bed;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
@ -14,6 +13,8 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
|
||||||
public class BedParserUnitTest extends BaseTest {
|
public class BedParserUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
|
@ -22,11 +23,7 @@ public class BedParserUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeTests() {
|
public static void beforeTests() {
|
||||||
try {
|
|
||||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new StingException("unable to load the sequence dictionary");
|
|
||||||
}
|
|
||||||
GenomeLocParser.setupRefContigOrdering(seq);
|
GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,250 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.fasta;
|
|
||||||
|
|
||||||
import net.sf.picard.PicardException;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the fasta sequence index reader.
|
|
||||||
*/
|
|
||||||
public class FastaSequenceIndexUnitTest extends BaseTest {
|
|
||||||
// our basic human 18 fai
|
|
||||||
private static String sequenceIndexName = null;
|
|
||||||
private FastaSequenceIndex sequenceIndex = null;
|
|
||||||
|
|
||||||
// a custom index that tests the colon, and semi-colon, and other random characters
|
|
||||||
private static String sequenceIndexColonSemiColonTestName = null;
|
|
||||||
private FastaSequenceIndex sequenceIndexColonSemiColonTest = null;
|
|
||||||
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void initialize() {
|
|
||||||
sequenceIndexName = seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.fai";
|
|
||||||
sequenceIndexColonSemiColonTestName = validationDataLocation + "testing.fai";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void doForEachTest() throws FileNotFoundException {
|
|
||||||
sequenceIndex = new FastaSequenceIndex( new File(sequenceIndexName) );
|
|
||||||
sequenceIndexColonSemiColonTest = new FastaSequenceIndex( new File(sequenceIndexColonSemiColonTestName) );
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testInitialContig() {
|
|
||||||
logger.warn("Executing testInitialContig");
|
|
||||||
|
|
||||||
Assert.assertTrue("Contig chrM is not present", sequenceIndex.hasIndexEntry("chrM"));
|
|
||||||
FastaSequenceIndexEntry entry = sequenceIndex.getIndexEntry("chrM");
|
|
||||||
Assert.assertEquals("Contig chrM name is incorrect",entry.getContig(),"chrM");
|
|
||||||
Assert.assertEquals("Contig chrM location is incorrect",entry.getLocation(),6L);
|
|
||||||
Assert.assertEquals("Contig chrM size is incorrect",entry.getSize(),16571L);
|
|
||||||
Assert.assertEquals("Contig chrM bases per line is incorrect",entry.getBasesPerLine(),50);
|
|
||||||
Assert.assertEquals("Contig chrM bytes per line is incorrect",entry.getBytesPerLine(),51);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMiddleContig() {
|
|
||||||
logger.warn("Executing testMiddleContig");
|
|
||||||
|
|
||||||
Assert.assertTrue("Contig chr8 is not present", sequenceIndex.hasIndexEntry("chr8"));
|
|
||||||
FastaSequenceIndexEntry entry = sequenceIndex.getIndexEntry("chr8");
|
|
||||||
Assert.assertEquals("Contig chr8 name is incorrect",entry.getContig(),"chr8");
|
|
||||||
Assert.assertEquals("Contig chr8 location is incorrect",entry.getLocation(),1419403101L);
|
|
||||||
Assert.assertEquals("Contig chr8 size is incorrect",entry.getSize(),146274826L);
|
|
||||||
Assert.assertEquals("Contig chr8 bases per line is incorrect",entry.getBasesPerLine(),50);
|
|
||||||
Assert.assertEquals("Contig chr8 bytes per line is incorrect",entry.getBytesPerLine(),51);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLastContig() {
|
|
||||||
logger.warn("Executing testLastContig");
|
|
||||||
|
|
||||||
Assert.assertTrue("Contig chrX_random is not present", sequenceIndex.hasIndexEntry("chrX_random"));
|
|
||||||
FastaSequenceIndexEntry entry = sequenceIndex.getIndexEntry("chrX_random");
|
|
||||||
Assert.assertEquals("Contig chrX_random name is incorrect",entry.getContig(),"chrX_random");
|
|
||||||
Assert.assertEquals("Contig chrX_random location is incorrect",entry.getLocation(),3156698441L);
|
|
||||||
Assert.assertEquals("Contig chrX_random size is incorrect",entry.getSize(),1719168L);
|
|
||||||
Assert.assertEquals("Contig chrX_random bases per line is incorrect",entry.getBasesPerLine(),50);
|
|
||||||
Assert.assertEquals("Contig chrX_random bytes per line is incorrect",entry.getBytesPerLine(),51);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAllContigsPresent() {
|
|
||||||
logger.warn("Executing testAllContigsPresent");
|
|
||||||
|
|
||||||
Assert.assertTrue("Contig chrM is not present", sequenceIndex.hasIndexEntry("chrM"));
|
|
||||||
Assert.assertTrue("Contig chr1 is not present", sequenceIndex.hasIndexEntry("chr1"));
|
|
||||||
Assert.assertTrue("Contig chr2 is not present", sequenceIndex.hasIndexEntry("chr2"));
|
|
||||||
Assert.assertTrue("Contig chr3 is not present", sequenceIndex.hasIndexEntry("chr3"));
|
|
||||||
Assert.assertTrue("Contig chr4 is not present", sequenceIndex.hasIndexEntry("chr4"));
|
|
||||||
Assert.assertTrue("Contig chr5 is not present", sequenceIndex.hasIndexEntry("chr5"));
|
|
||||||
Assert.assertTrue("Contig chr6 is not present", sequenceIndex.hasIndexEntry("chr6"));
|
|
||||||
Assert.assertTrue("Contig chr7 is not present", sequenceIndex.hasIndexEntry("chr7"));
|
|
||||||
Assert.assertTrue("Contig chr8 is not present", sequenceIndex.hasIndexEntry("chr8"));
|
|
||||||
Assert.assertTrue("Contig chr9 is not present", sequenceIndex.hasIndexEntry("chr9"));
|
|
||||||
Assert.assertTrue("Contig chr10 is not present", sequenceIndex.hasIndexEntry("chr10"));
|
|
||||||
Assert.assertTrue("Contig chr11 is not present", sequenceIndex.hasIndexEntry("chr11"));
|
|
||||||
Assert.assertTrue("Contig chr12 is not present", sequenceIndex.hasIndexEntry("chr12"));
|
|
||||||
Assert.assertTrue("Contig chr13 is not present", sequenceIndex.hasIndexEntry("chr13"));
|
|
||||||
Assert.assertTrue("Contig chr14 is not present", sequenceIndex.hasIndexEntry("chr14"));
|
|
||||||
Assert.assertTrue("Contig chr15 is not present", sequenceIndex.hasIndexEntry("chr15"));
|
|
||||||
Assert.assertTrue("Contig chr16 is not present", sequenceIndex.hasIndexEntry("chr16"));
|
|
||||||
Assert.assertTrue("Contig chr17 is not present", sequenceIndex.hasIndexEntry("chr17"));
|
|
||||||
Assert.assertTrue("Contig chr18 is not present", sequenceIndex.hasIndexEntry("chr18"));
|
|
||||||
Assert.assertTrue("Contig chr19 is not present", sequenceIndex.hasIndexEntry("chr19"));
|
|
||||||
Assert.assertTrue("Contig chr20 is not present", sequenceIndex.hasIndexEntry("chr20"));
|
|
||||||
Assert.assertTrue("Contig chr21 is not present", sequenceIndex.hasIndexEntry("chr21"));
|
|
||||||
Assert.assertTrue("Contig chr22 is not present", sequenceIndex.hasIndexEntry("chr22"));
|
|
||||||
Assert.assertTrue("Contig chrX is not present", sequenceIndex.hasIndexEntry("chrX"));
|
|
||||||
Assert.assertTrue("Contig chrY is not present", sequenceIndex.hasIndexEntry("chrY"));
|
|
||||||
Assert.assertTrue("Contig chr1_random is not present", sequenceIndex.hasIndexEntry("chr1_random"));
|
|
||||||
Assert.assertTrue("Contig chr2_random is not present", sequenceIndex.hasIndexEntry("chr2_random"));
|
|
||||||
Assert.assertTrue("Contig chr3_random is not present", sequenceIndex.hasIndexEntry("chr3_random"));
|
|
||||||
Assert.assertTrue("Contig chr4_random is not present", sequenceIndex.hasIndexEntry("chr4_random"));
|
|
||||||
Assert.assertTrue("Contig chr5_random is not present", sequenceIndex.hasIndexEntry("chr5_random"));
|
|
||||||
Assert.assertTrue("Contig chr6_random is not present", sequenceIndex.hasIndexEntry("chr6_random"));
|
|
||||||
Assert.assertTrue("Contig chr7_random is not present", sequenceIndex.hasIndexEntry("chr7_random"));
|
|
||||||
Assert.assertTrue("Contig chr8_random is not present", sequenceIndex.hasIndexEntry("chr8_random"));
|
|
||||||
Assert.assertTrue("Contig chr9_random is not present", sequenceIndex.hasIndexEntry("chr9_random"));
|
|
||||||
Assert.assertTrue("Contig chr10_random is not present", sequenceIndex.hasIndexEntry("chr10_random"));
|
|
||||||
Assert.assertTrue("Contig chr11_random is not present", sequenceIndex.hasIndexEntry("chr11_random"));
|
|
||||||
Assert.assertTrue("Contig chr13_random is not present", sequenceIndex.hasIndexEntry("chr13_random"));
|
|
||||||
Assert.assertTrue("Contig chr15_random is not present", sequenceIndex.hasIndexEntry("chr15_random"));
|
|
||||||
Assert.assertTrue("Contig chr16_random is not present", sequenceIndex.hasIndexEntry("chr16_random"));
|
|
||||||
Assert.assertTrue("Contig chr17_random is not present", sequenceIndex.hasIndexEntry("chr17_random"));
|
|
||||||
Assert.assertTrue("Contig chr18_random is not present", sequenceIndex.hasIndexEntry("chr18_random"));
|
|
||||||
Assert.assertTrue("Contig chr19_random is not present", sequenceIndex.hasIndexEntry("chr19_random"));
|
|
||||||
Assert.assertTrue("Contig chr21_random is not present", sequenceIndex.hasIndexEntry("chr21_random"));
|
|
||||||
Assert.assertTrue("Contig chr22_random is not present", sequenceIndex.hasIndexEntry("chr22_random"));
|
|
||||||
Assert.assertTrue("Contig chrX_random is not present", sequenceIndex.hasIndexEntry("chrX_random"));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testHasInvalidEntry() {
|
|
||||||
logger.warn("Executing testHasInvalidEntry");
|
|
||||||
|
|
||||||
Assert.assertFalse("Found an invalid entry", sequenceIndex.hasIndexEntry("invalid"));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(expected= PicardException.class)
|
|
||||||
public void testGetInvalidEntry() {
|
|
||||||
logger.warn("Executing testGetInvalidEntry");
|
|
||||||
|
|
||||||
sequenceIndex.getIndexEntry("invalid");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testIteration() {
|
|
||||||
logger.warn("Executing testIteration");
|
|
||||||
|
|
||||||
Iterator<FastaSequenceIndexEntry> sequenceIndexEntries = sequenceIndex.iterator();
|
|
||||||
|
|
||||||
Assert.assertEquals("Contig chrM is not present", "chrM", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr1 is not present", "chr1", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr2 is not present", "chr2", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr3 is not present", "chr3", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr4 is not present", "chr4", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr5 is not present", "chr5", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr6 is not present", "chr6", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr7 is not present", "chr7", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr8 is not present", "chr8", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr9 is not present", "chr9", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr10 is not present", "chr10", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr11 is not present", "chr11", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr12 is not present", "chr12", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr13 is not present", "chr13", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr14 is not present", "chr14", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr15 is not present", "chr15", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr16 is not present", "chr16", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr17 is not present", "chr17", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr18 is not present", "chr18", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr19 is not present", "chr19", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr20 is not present", "chr20", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr21 is not present", "chr21", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr22 is not present", "chr22", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chrX is not present", "chrX", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chrY is not present", "chrY", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr1_random is not present", "chr1_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr2_random is not present", "chr2_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr3_random is not present", "chr3_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr4_random is not present", "chr4_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr5_random is not present", "chr5_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr6_random is not present", "chr6_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr7_random is not present", "chr7_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr8_random is not present", "chr8_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr9_random is not present", "chr9_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr10_random is not present", "chr10_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr11_random is not present", "chr11_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr13_random is not present", "chr13_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr15_random is not present", "chr15_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr16_random is not present", "chr16_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr17_random is not present", "chr17_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr18_random is not present", "chr18_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr19_random is not present", "chr19_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr21_random is not present", "chr21_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chr22_random is not present", "chr22_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertEquals("Contig chrX_random is not present", "chrX_random", sequenceIndexEntries.next().getContig());
|
|
||||||
Assert.assertFalse("Iterator still has more entries", sequenceIndexEntries.hasNext());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSpecialCharacters() {
|
|
||||||
/* file contents:
|
|
||||||
chrM 16571 6 50 51
|
|
||||||
chr1;boat 247249719 16915 50 51
|
|
||||||
chr2:money 242951149 252211635 50 51
|
|
||||||
chr3::; 199501827 500021813 50 51
|
|
||||||
;;;;;; 1234 1234 1234 1234
|
|
||||||
file:gi|17981852|ref|NC_001807.4| 16571 2911876801 70 71
|
|
||||||
*/
|
|
||||||
Iterator<FastaSequenceIndexEntry> sequenceIndexEntries = sequenceIndexColonSemiColonTest.iterator();
|
|
||||||
FastaSequenceIndexEntry ent = sequenceIndexEntries.next();
|
|
||||||
Assert.assertEquals("Contig chrM is not present","chrM",ent.getContig());
|
|
||||||
Assert.assertEquals("Contig chrM size is not correct",16571,ent.getSize());
|
|
||||||
Assert.assertEquals("Contig chrM location is not correct",6,ent.getLocation());
|
|
||||||
Assert.assertEquals("Contig chrM bases per line is not correct",50,ent.getBasesPerLine());
|
|
||||||
Assert.assertEquals("Contig chrM bytes per line is not correct",51,ent.getBytesPerLine());
|
|
||||||
|
|
||||||
ent = sequenceIndexEntries.next();
|
|
||||||
Assert.assertEquals("Contig chr1;boat is not present","chr1;boat",ent.getContig());
|
|
||||||
Assert.assertEquals("Contig chr1;boat size is not correct",247249719,ent.getSize());
|
|
||||||
Assert.assertEquals("Contig chr1;boat location is not correct",16915,ent.getLocation());
|
|
||||||
Assert.assertEquals("Contig chr1;boat bases per line is not correct",50,ent.getBasesPerLine());
|
|
||||||
Assert.assertEquals("Contig chr1;boat bytes per line is not correct",51,ent.getBytesPerLine());
|
|
||||||
|
|
||||||
ent = sequenceIndexEntries.next();
|
|
||||||
Assert.assertEquals("Contig chr2:money is not present","chr2:money",ent.getContig());
|
|
||||||
Assert.assertEquals("Contig chr2:money size is not correct",242951149,ent.getSize());
|
|
||||||
Assert.assertEquals("Contig chr2:money location is not correct",252211635,ent.getLocation());
|
|
||||||
Assert.assertEquals("Contig chr2:money bases per line is not correct",50,ent.getBasesPerLine());
|
|
||||||
Assert.assertEquals("Contig chr2:money bytes per line is not correct",51,ent.getBytesPerLine());
|
|
||||||
|
|
||||||
ent = sequenceIndexEntries.next();
|
|
||||||
Assert.assertEquals("Contig chr3::; is not present","chr3::;",ent.getContig());
|
|
||||||
Assert.assertEquals("Contig chr3::; size is not correct",199501827,ent.getSize());
|
|
||||||
Assert.assertEquals("Contig chrM location is not correct",500021813,ent.getLocation());
|
|
||||||
Assert.assertEquals("Contig chr3::; bases per line is not correct",50,ent.getBasesPerLine());
|
|
||||||
Assert.assertEquals("Contig chr3::; bytes per line is not correct",51,ent.getBytesPerLine());
|
|
||||||
|
|
||||||
ent = sequenceIndexEntries.next();
|
|
||||||
Assert.assertEquals("Contig ;;;;;;;; is not present",";;;;;;;;",ent.getContig());
|
|
||||||
Assert.assertEquals("Contig ;;;;;;;; size is not correct",123,ent.getSize());
|
|
||||||
Assert.assertEquals("Contig ;;;;;;;; location is not correct",234,ent.getLocation());
|
|
||||||
Assert.assertEquals("Contig ;;;;;;;; bases per line is not correct",456,ent.getBasesPerLine());
|
|
||||||
Assert.assertEquals("Contig ;;;;;;;; bytes per line is not correct",789,ent.getBytesPerLine());
|
|
||||||
|
|
||||||
ent = sequenceIndexEntries.next();
|
|
||||||
Assert.assertEquals("Contig file:gi|17981852|ref|NC_001807.4| is not present","file:gi|17981852|ref|NC_001807.4|",ent.getContig());
|
|
||||||
Assert.assertEquals("Contig file:gi|17981852|ref|NC_001807.4| size is not correct",16571,ent.getSize());
|
|
||||||
Assert.assertEquals("Contig file:gi|17981852|ref|NC_001807.4| location is not correct",2911876801L,ent.getLocation());
|
|
||||||
Assert.assertEquals("Contig file:gi|17981852|ref|NC_001807.4| bases per line is not correct",70,ent.getBasesPerLine());
|
|
||||||
Assert.assertEquals("Contig file:gi|17981852|ref|NC_001807.4| bytes per line is not correct",71,ent.getBytesPerLine());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,272 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.fasta;
|
|
||||||
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
|
||||||
import net.sf.picard.PicardException;
|
|
||||||
import net.sf.samtools.util.StringUtil;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the indexed fasta sequence file reader.
|
|
||||||
*/
|
|
||||||
public class IndexedFastaSequenceFileUnitTest extends BaseTest {
|
|
||||||
private static String sequenceFileName;
|
|
||||||
private IndexedFastaSequenceFile sequenceFile = null;
|
|
||||||
|
|
||||||
private final String firstBasesOfChrM = "GATCACAGGTCTATCACCCT";
|
|
||||||
private final String extendedBasesOfChrM = "GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT" +
|
|
||||||
"TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG" +
|
|
||||||
"GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT";
|
|
||||||
private final String firstBasesOfChr1 = "taaccctaaccctaacccta";
|
|
||||||
private final String firstBasesOfChr8 = "GCAATTATGACACAAAAAAT";
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void initialize() {
|
|
||||||
sequenceFileName = seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void doForEachTest() throws FileNotFoundException {
|
|
||||||
sequenceFile = new IndexedFastaSequenceFile( new File(sequenceFileName) );
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testOpenFile() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
Assert.assertNotNull( sequenceFile );
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
System.err.printf("testOpenFile runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFirstSequence() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",1,firstBasesOfChrM.length());
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 0);
|
|
||||||
Assert.assertArrayEquals( "First n bases of chrM are incorrect",StringUtil.stringToBytes(firstBasesOfChrM),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testFirstSequence runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFirstSequenceExtended() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",1,extendedBasesOfChrM.length());
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 0);
|
|
||||||
Assert.assertArrayEquals( "First n bases of chrM are incorrect",StringUtil.stringToBytes(extendedBasesOfChrM),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testFirstSequenceExtended runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReadStartingInCenterOfFirstLine() {
|
|
||||||
final int bytesToChopOff = 5;
|
|
||||||
String truncated = extendedBasesOfChrM.substring(bytesToChopOff);
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
|
|
||||||
bytesToChopOff + 1,
|
|
||||||
bytesToChopOff + truncated.length());
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 0);
|
|
||||||
Assert.assertArrayEquals( "First n bases of chrM are incorrect",StringUtil.stringToBytes(truncated),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testReadStartingInCenterOfFirstLine runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReadStartingInCenterOfMiddleLine() {
|
|
||||||
final int bytesToChopOff = 120;
|
|
||||||
String truncated = extendedBasesOfChrM.substring(bytesToChopOff);
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
|
|
||||||
bytesToChopOff + 1,
|
|
||||||
bytesToChopOff + truncated.length());
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 0);
|
|
||||||
Assert.assertArrayEquals( "First n bases of chrM are incorrect",StringUtil.stringToBytes(truncated),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testReadStartingInCenterOfMiddleLine runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFirstCompleteContigRead() {
|
|
||||||
ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName));
|
|
||||||
ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSequence("chrM");
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 0);
|
|
||||||
Assert.assertArrayEquals("chrM is incorrect",expectedSequence.getBases(),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testFirstCompleteContigRead runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(expected= PicardException.class)
|
|
||||||
public void testReadThroughEndOfContig() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
try {
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16500,16600);
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
System.err.printf("testReadThroughEndOfContig runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(expected= PicardException.class)
|
|
||||||
public void testReadPastEndOfContig() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
try {
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16800,16900);
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
System.err.printf("testReadPastEndOfContig runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMiddleCompleteContigRead() {
|
|
||||||
ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName));
|
|
||||||
ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
while( !expectedSequence.getName().equals("chrY") )
|
|
||||||
expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSequence("chrY");
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrY");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 24);
|
|
||||||
Assert.assertArrayEquals("chrY is incorrect",expectedSequence.getBases(),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testMiddleCompleteContigRead runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLastCompleteContigRead() {
|
|
||||||
ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName));
|
|
||||||
ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
while( !expectedSequence.getName().equals("chrX_random") )
|
|
||||||
expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSequence("chrX_random");
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrX_random");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 44);
|
|
||||||
Assert.assertArrayEquals("chrX_random is incorrect",expectedSequence.getBases(),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testLastCompleteContigRead runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFirstOfChr1() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",1,firstBasesOfChr1.length());
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chr1");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 1);
|
|
||||||
Assert.assertArrayEquals( "First n bases of chr1 are incorrect",StringUtil.stringToBytes(firstBasesOfChr1),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testFirstOfChr1 runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFirstOfChr8() {
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",1,firstBasesOfChr8.length());
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chr8");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 8);
|
|
||||||
Assert.assertArrayEquals( "First n bases of chr8 are incorrect",StringUtil.stringToBytes(firstBasesOfChr8),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testFirstOfChr8 runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFirstElementOfIterator() {
|
|
||||||
ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName));
|
|
||||||
ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
ReferenceSequence sequence = sequenceFile.nextSequence();
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", sequence.getName(), "chrM");
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", sequence.getContigIndex(), 0);
|
|
||||||
Assert.assertArrayEquals("chrM is incorrect",expectedSequence.getBases(),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testFirstElementOfIterator runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testNextElementOfIterator() {
|
|
||||||
ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName));
|
|
||||||
// Skip past the first one and load the second one.
|
|
||||||
originalSequenceFile.nextSequence();
|
|
||||||
ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
sequenceFile.nextSequence();
|
|
||||||
ReferenceSequence sequence = sequenceFile.nextSequence();
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", "chr1", sequence.getName());
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", 1, sequence.getContigIndex());
|
|
||||||
Assert.assertEquals("Sequence size is not correct", expectedSequence.length(), sequence.length());
|
|
||||||
Assert.assertArrayEquals("chr1 is incorrect",expectedSequence.getBases(),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testNextElementOfIterator runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReset() {
|
|
||||||
ReferenceSequenceFile originalSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(sequenceFileName));
|
|
||||||
// Skip past the first one and load the second one.
|
|
||||||
ReferenceSequence expectedSequence = originalSequenceFile.nextSequence();
|
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
sequenceFile.nextSequence();
|
|
||||||
sequenceFile.nextSequence();
|
|
||||||
sequenceFile.reset();
|
|
||||||
ReferenceSequence sequence = sequenceFile.nextSequence();
|
|
||||||
long endTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
Assert.assertEquals("Sequence contig is not correct", "chrM", sequence.getName());
|
|
||||||
Assert.assertEquals("Sequence contig index is not correct", 0, sequence.getContigIndex());
|
|
||||||
Assert.assertEquals("Sequence size is not correct", expectedSequence.length(), sequence.length());
|
|
||||||
Assert.assertArrayEquals("chrM is incorrect", expectedSequence.getBases(),sequence.getBases());
|
|
||||||
|
|
||||||
System.err.printf("testReset runtime: %dms%n", (endTime - startTime)) ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -4,7 +4,6 @@ import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
@ -15,6 +14,7 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -66,11 +66,7 @@ public class GLFWriterUnitTest extends BaseTest {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeTests() {
|
public static void beforeTests() {
|
||||||
IndexedFastaSequenceFile seq;
|
IndexedFastaSequenceFile seq;
|
||||||
try {
|
|
||||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new StingException("unable to load the sequence dictionary");
|
|
||||||
}
|
|
||||||
GenomeLocParser.setupRefContigOrdering(seq);
|
GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
import org.broad.tribble.vcf.*;
|
import org.broad.tribble.vcf.*;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
|
@ -14,6 +13,8 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
|
|
@ -29,12 +30,8 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeTests() {
|
public static void beforeTests() {
|
||||||
try {
|
|
||||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||||
GenomeLocParser.setupRefContigOrdering(seq);
|
GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new StingException("unable to load the sequence dictionary");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test, using the writer and reader, that we can output and input a VCF file without problems */
|
/** test, using the writer and reader, that we can output and input a VCF file without problems */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue