1. Added functionality to the data sources to allow engine to get mapping from input files to (merged) read group ids from those files.
2. Used said mapping to implement N-way-in,N-way-out functionality in the new indel cleaner. Still needs more testing (to be done after vacation but preliminary tests look good). 3. Fixes to VCF validator: ignore case when testing VCF reference base against true reference base and allow quals of -1 (as per spec). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2773 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
233a652161
commit
83b9d63d59
|
|
@ -378,12 +378,23 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to their (merged) read group ids
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
||||
return getDataSource().getFileToReadGroupIdMapping();
|
||||
}
|
||||
|
||||
/**
|
||||
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
|
||||
*
|
||||
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
|
||||
* (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
|
||||
*
|
||||
* @return
|
||||
* @return sets of (merged) read group ids in order of input bams
|
||||
*/
|
||||
public List<Set<String>> getMergedReadGroupsByReaders() {
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
|
||||
|
|
@ -16,8 +17,8 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -111,6 +112,14 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
return resourcePool.getHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to their (merged) read group ids
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
||||
return resourcePool.getFileToReadGroupIdMapping();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
|
||||
|
|
|
|||
|
|
@ -33,8 +33,7 @@ import net.sf.samtools.SAMFileReader;
|
|||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
|
|
@ -75,6 +74,11 @@ class ReadStreamResource {
|
|||
*/
|
||||
private ReadStreamPointer readStreamPointer = null;
|
||||
|
||||
/**
|
||||
* A mapping from original input file to merged read group record ids
|
||||
*/
|
||||
private Map<File, Set<String>> fileToReadGroupIdMap = null;
|
||||
|
||||
public ReadStreamResource( Reads sourceInfo ) {
|
||||
SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
|
||||
|
||||
|
|
@ -104,7 +108,7 @@ class ReadStreamResource {
|
|||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources as well as
|
||||
* information about how they are downsampled, sorted, and filtered
|
||||
* @return
|
||||
* @return the Reads object
|
||||
*/
|
||||
public Reads getReadsInfo() { return readStreamPointer.getReadsInfo(); }
|
||||
|
||||
|
|
@ -112,7 +116,7 @@ class ReadStreamResource {
|
|||
* Returns header merger: a class that keeps the mapping between original read groups and read groups
|
||||
* of the merged stream; merger also provides access to the individual file readers (and hence headers
|
||||
* too) maintained by the system.
|
||||
* @return
|
||||
* @return the header merger
|
||||
*/
|
||||
public SamFileHeaderMerger getHeaderMerger() { return readStreamPointer.getHeaderMerger(); }
|
||||
|
||||
|
|
@ -134,11 +138,14 @@ class ReadStreamResource {
|
|||
return readStreamPointer.getReadsContainedBy(segment);
|
||||
}
|
||||
|
||||
|
||||
public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) {
|
||||
return readStreamPointer.getReadsOverlapping(segment);
|
||||
}
|
||||
|
||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
||||
return fileToReadGroupIdMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* A private function that, given the internal file list, generates a merging construct for
|
||||
* all available files.
|
||||
|
|
@ -149,10 +156,13 @@ class ReadStreamResource {
|
|||
*/
|
||||
private SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER )
|
||||
throws SimpleDataSourceLoadException {
|
||||
|
||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
|
||||
Map<File, SAMFileReader> fileToReaderMap = new HashMap<File, SAMFileReader>();
|
||||
for (File f : reads.getReadsFiles()) {
|
||||
SAMFileReader reader = new SAMFileReader(f, eagerDecode);
|
||||
fileToReaderMap.put(f, reader);
|
||||
reader.setValidationStringency(reads.getValidationStringency());
|
||||
|
||||
final SAMFileHeader header = reader.getFileHeader();
|
||||
|
|
@ -169,6 +179,31 @@ class ReadStreamResource {
|
|||
|
||||
lst.add(reader);
|
||||
}
|
||||
return new SamFileHeaderMerger(lst,SORT_ORDER,true);
|
||||
|
||||
// create the header merger
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(lst,SORT_ORDER,true);
|
||||
|
||||
// populate the file -> read group mapping
|
||||
fileToReadGroupIdMap = new HashMap<File, Set<String>>();
|
||||
for (Map.Entry<File, SAMFileReader> entry : fileToReaderMap.entrySet()) {
|
||||
|
||||
Set<String> readGroups = new HashSet<String>(5);
|
||||
|
||||
for (SAMReadGroupRecord g : entry.getValue().getFileHeader().getReadGroups()) {
|
||||
if (headerMerger.hasReadGroupCollisions()) {
|
||||
// Check if there were read group clashes.
|
||||
// If there were, use the SamFileHeaderMerger to translate from the
|
||||
// original read group id to the read group id in the merged stream
|
||||
readGroups.add(headerMerger.getReadGroupId(entry.getValue(), g.getReadGroupId()));
|
||||
} else {
|
||||
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
||||
readGroups.add(g.getReadGroupId());
|
||||
}
|
||||
}
|
||||
|
||||
fileToReadGroupIdMap.put(entry.getKey(), readGroups);
|
||||
}
|
||||
|
||||
return headerMerger;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -111,6 +113,13 @@ public abstract class SAMDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public Reads getReadsInfo() { return reads; }
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to their (merged) read group ids
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() { return null; }
|
||||
|
||||
/**
|
||||
* Returns readers used by this data source.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
|||
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
|
@ -35,6 +34,9 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Maintain a pool of resources of accessors to SAM read data. SAMFileReaders and
|
||||
|
|
@ -50,6 +52,7 @@ class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator>
|
|||
/** Source information about the reads. */
|
||||
protected Reads reads;
|
||||
protected SamFileHeaderMerger headerMerger;
|
||||
protected Map<File, Set<String>> fileToReadGroupIdMap;
|
||||
|
||||
/**
|
||||
* Do all the constituent BAM files have indices? We support some very limited
|
||||
|
|
@ -71,7 +74,8 @@ class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator>
|
|||
this.header = streamResource.getHeader();
|
||||
this.headerMerger = streamResource.getHeaderMerger();
|
||||
this.hasIndex = streamResource.hasIndex();
|
||||
|
||||
this.fileToReadGroupIdMap = streamResource.getFileToReadGroupIdMapping();
|
||||
|
||||
// Add this resource to the pool.
|
||||
this.addNewResource(streamResource);
|
||||
}
|
||||
|
|
@ -88,13 +92,22 @@ class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator>
|
|||
*/
|
||||
public Reads getReadsInfo() { return reads; }
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns a mapping from original input files to their (merged) read group ids
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
||||
return fileToReadGroupIdMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns header merger: a class that keeps the mapping between original read groups and read groups
|
||||
* of the merged stream; merger also provides access to the individual file readers (and hence headers
|
||||
* too) maintained by the system.
|
||||
* @return
|
||||
*/
|
||||
public SamFileHeaderMerger getHeaderMerger() { return headerMerger; }
|
||||
public SamFileHeaderMerger getHeaderMerger() { return headerMerger; }
|
||||
|
||||
protected ReadStreamResource selectBestExistingResource( DataStreamSegment segment, List<ReadStreamResource> resources ) {
|
||||
for (ReadStreamResource resource : resources) {
|
||||
|
|
|
|||
|
|
@ -24,30 +24,46 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
|
||||
protected double LOD_THRESHOLD = 5.0;
|
||||
@Argument(fullName="EntropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
|
||||
|
||||
@Argument(fullName="entropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
|
||||
protected double MISMATCH_THRESHOLD = 0.15;
|
||||
|
||||
@Argument(fullName="OutputBam", shortName="O", required=false, doc="Output bam")
|
||||
protected SAMFileWriter baseWriter = null;
|
||||
@Argument(fullName="output", shortName="O", required=false, doc="Output bam (or directory if using --NwayOutput)")
|
||||
protected String baseWriterFilename = null;
|
||||
|
||||
@Argument(fullName="NWayOutput", shortName="nway", required=false, doc="Should the reads be emitted in a separate bam file for each one of the input bams? [default:yes]")
|
||||
protected boolean NWAY_OUTPUT = true;
|
||||
|
||||
@Argument(fullName="outputSuffix", shortName="suffix", required=false, doc="Suffix to append to output bams (when using --NwayOutput) [default:'.cleaned']")
|
||||
protected String outputSuffix = ".cleaned";
|
||||
|
||||
@Argument(fullName="bam_compression", shortName="compress", required=false, doc="Compression level to use for output bams [default:5]")
|
||||
protected Integer compressionLevel = 5;
|
||||
|
||||
// ADVANCED OPTIONS FOLLOW
|
||||
|
||||
@Argument(fullName="outputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found")
|
||||
protected String OUT_INDELS = null;
|
||||
|
||||
@Argument(fullName="OutputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found")
|
||||
String OUT_INDELS = null;
|
||||
@Argument(fullName="statisticsFile", shortName="stats", doc="print out statistics (what does or doesn't get cleaned)", required=false)
|
||||
String OUT_STATS = null;
|
||||
protected String OUT_STATS = null;
|
||||
|
||||
@Argument(fullName="SNPsFile", shortName="snps", doc="print out whether mismatching columns do or don't get cleaned out", required=false)
|
||||
String OUT_SNPS = null;
|
||||
protected String OUT_SNPS = null;
|
||||
|
||||
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
|
||||
int MAX_CONSENSUSES = 30;
|
||||
protected int MAX_CONSENSUSES = 30;
|
||||
|
||||
@Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false)
|
||||
int MAX_READS_FOR_CONSENSUSES = 120;
|
||||
protected int MAX_READS_FOR_CONSENSUSES = 120;
|
||||
|
||||
@Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment", required=false)
|
||||
int MAX_READS = 20000;
|
||||
protected int MAX_READS = 20000;
|
||||
|
||||
@Argument(fullName="writerWindowSize", shortName="writerWindowSize", doc="the window over which the writer will store reads", required=false)
|
||||
protected int SORTING_WRITER_WINDOW = 100;
|
||||
|
||||
|
||||
// the intervals input by the user
|
||||
private Iterator<GenomeLoc> intervals = null;
|
||||
|
||||
|
|
@ -59,7 +75,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
|
||||
|
||||
// the wrapper around the SAM writer
|
||||
private SortingSAMFileWriter writer = null;
|
||||
private Map<String, SortingSAMFileWriter> writers = null;
|
||||
|
||||
// random number generator
|
||||
private Random generator;
|
||||
|
|
@ -87,13 +103,36 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( MISMATCH_THRESHOLD <= 0.0 || MISMATCH_THRESHOLD > 1.0 )
|
||||
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
|
||||
|
||||
// read in the intervals for cleaning
|
||||
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(intervalsFile));
|
||||
intervals = GenomeLocSortedSet.createSetFromList(locs).iterator();
|
||||
currentInterval = intervals.hasNext() ? intervals.next() : null;
|
||||
|
||||
if ( baseWriter != null )
|
||||
writer = new SortingSAMFileWriter(baseWriter, SORTING_WRITER_WINDOW);
|
||||
// set up the output writer(s)
|
||||
if ( baseWriterFilename != null ) {
|
||||
writers = new HashMap<String, SortingSAMFileWriter>();
|
||||
Map<File, Set<String>> readGroupMap = getToolkit().getFileToReadGroupIdMapping();
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
|
||||
if ( NWAY_OUTPUT ) {
|
||||
for ( File file : readGroupMap.keySet() ) {
|
||||
String newFileName = file.getName().substring(0, file.getName().length()-3) + outputSuffix + ".bam";
|
||||
SAMFileWriter baseWriter = factory.makeBAMWriter(getToolkit().getSAMFileHeader(), true, new File(baseWriterFilename, newFileName), compressionLevel);
|
||||
SortingSAMFileWriter writer = new SortingSAMFileWriter(baseWriter, SORTING_WRITER_WINDOW);
|
||||
for ( String rg : readGroupMap.get(file) )
|
||||
writers.put(rg, writer);
|
||||
}
|
||||
} else {
|
||||
SAMFileWriter baseWriter = factory.makeBAMWriter(getToolkit().getSAMFileHeader(), true, new File(baseWriterFilename), compressionLevel);
|
||||
SortingSAMFileWriter writer = new SortingSAMFileWriter(baseWriter, SORTING_WRITER_WINDOW);
|
||||
for ( Set<String> set : readGroupMap.values() ) {
|
||||
for ( String rg : set )
|
||||
writers.put(rg, writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set up the random generator
|
||||
generator = new Random(RANDOM_SEED);
|
||||
|
||||
if ( OUT_INDELS != null ) {
|
||||
|
|
@ -126,13 +165,40 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private void emit(SAMRecord read) {
|
||||
if ( writer != null )
|
||||
writer.addAlignment(read);
|
||||
if ( writers != null ) {
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if ( readGroup == null || readGroup.getReadGroupId() == null ) {
|
||||
if ( writers.size() > 1 )
|
||||
throw new StingException("There are multiple output writers but read " + read.toString() + " has no read group");
|
||||
writers.values().iterator().next().addAlignment(read);
|
||||
} else {
|
||||
writers.get(readGroup.getReadGroupId()).addAlignment(read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void emit(List<SAMRecord> reads) {
|
||||
if ( writer != null )
|
||||
writer.addAlignments(reads);
|
||||
if ( writers == null )
|
||||
return;
|
||||
|
||||
// break out the reads into sets for their respective writers
|
||||
Map<SortingSAMFileWriter, Set<SAMRecord>> bins = new HashMap<SortingSAMFileWriter, Set<SAMRecord>>();
|
||||
for ( SortingSAMFileWriter writer : writers.values() )
|
||||
bins.put(writer, new HashSet<SAMRecord>());
|
||||
|
||||
for ( SAMRecord read : reads ) {
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if ( readGroup == null || readGroup.getReadGroupId() == null ) {
|
||||
if ( writers.size() > 1 )
|
||||
throw new StingException("There are multiple output writers but read " + read.toString() + " has no read group");
|
||||
bins.get(writers.values().iterator().next()).add(read);
|
||||
} else {
|
||||
bins.get(writers.get(readGroup.getReadGroupId())).add(read);
|
||||
}
|
||||
}
|
||||
|
||||
for ( Map.Entry<SortingSAMFileWriter, Set<SAMRecord>> entry : bins.entrySet() )
|
||||
entry.getKey().addAlignments(entry.getValue());
|
||||
}
|
||||
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
|
|
@ -146,7 +212,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( readLoc.getStop() == 0 )
|
||||
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
||||
|
||||
if ( readLoc.isBefore(currentInterval) ) {
|
||||
if ( readLoc.isBefore(currentInterval) || Utils.is454Read(read) ) {
|
||||
emit(read);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -155,8 +221,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
read.getDuplicateReadFlag() ||
|
||||
read.getNotPrimaryAlignmentFlag() ||
|
||||
read.getMappingQuality() == 0 ||
|
||||
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ||
|
||||
Utils.is454Read(read) ) {
|
||||
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) {
|
||||
readsNotToClean.add(read);
|
||||
} else {
|
||||
readsToClean.add(read, ref);
|
||||
|
|
@ -206,8 +271,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
emit(readsNotToClean);
|
||||
}
|
||||
|
||||
if ( writer != null )
|
||||
writer.close();
|
||||
if ( writers != null ) {
|
||||
for ( SortingSAMFileWriter writer : writers.values() ) {
|
||||
writer.close();
|
||||
|
||||
// TODO -- fix me
|
||||
try {
|
||||
writer.getBaseWriter().close();
|
||||
} catch (net.sf.samtools.util.RuntimeIOException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
if ( OUT_INDELS != null ) {
|
||||
try {
|
||||
|
|
@ -905,7 +978,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private Cigar newCigar = null;
|
||||
private int newStart = -1;
|
||||
private int mismatchScoreToReference;
|
||||
private boolean updated = false;
|
||||
|
||||
public AlignedRead(SAMRecord read) {
|
||||
this.read = read;
|
||||
|
|
@ -978,14 +1050,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
read.setCigar(newCigar);
|
||||
read.setAlignmentStart(newStart);
|
||||
}
|
||||
updated = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean wasUpdated() {
|
||||
return updated;
|
||||
}
|
||||
|
||||
public void setMismatchScoreToReference(int score) {
|
||||
mismatchScoreToReference = score;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.*;
|
|||
|
||||
import java.util.TreeSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -66,14 +66,14 @@ public class SortingSAMFileWriter implements SAMFileWriter {
|
|||
/**
|
||||
* Add a list of reads to the writer for emission; the reads do NOT need to be sorted
|
||||
*
|
||||
* @param reads the reads to emit
|
||||
* @param reads the reads to emit
|
||||
*/
|
||||
public void addAlignments(List<SAMRecord> reads) {
|
||||
public void addAlignments(Collection<SAMRecord> reads) {
|
||||
if ( reads.size() == 0 )
|
||||
return;
|
||||
|
||||
// at a new contig, clear the cache
|
||||
if ( cachedReads.size() > 0 && cachedReads.first().getReferenceIndex() < reads.get(0).getReferenceIndex() )
|
||||
if ( cachedReads.size() > 0 && cachedReads.first().getReferenceIndex() < reads.iterator().next().getReferenceIndex() )
|
||||
clearCache();
|
||||
|
||||
cachedReads.addAll(reads);
|
||||
|
|
@ -96,19 +96,26 @@ public class SortingSAMFileWriter implements SAMFileWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* get the SAM file header
|
||||
* @return the SAM file header
|
||||
*/
|
||||
public SAMFileHeader getFileHeader() {
|
||||
return baseWriter.getFileHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* close this writer by clearing the cache
|
||||
* close this writer by clearing the cache (but DO NOT close the underlying SAMFileWriter)
|
||||
*/
|
||||
public void close() {
|
||||
clearCache();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the underlying SAMFileWriter
|
||||
*/
|
||||
public SAMFileWriter getBaseWriter() {
|
||||
return baseWriter;
|
||||
}
|
||||
|
||||
private void clearCache() {
|
||||
Iterator<SAMRecord> iter = cachedReads.iterator();
|
||||
while ( iter.hasNext() )
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
/**
|
||||
* A light-weight validator for a VCF file.
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= RodVCF.class))
|
||||
@Requires(value={},referenceMetaData=@RMD(name="vcf", type=RodVCF.class))
|
||||
public class VCFValidator extends RodWalker<Integer, Integer> {
|
||||
|
||||
/**
|
||||
|
|
@ -28,7 +28,7 @@ public class VCFValidator extends RodWalker<Integer, Integer> {
|
|||
RODRecordList<ReferenceOrderedDatum> rodlist = tracker.getTrackData("vcf", null);
|
||||
if ( rodlist != null ) {
|
||||
RodVCF rod = (RodVCF)rodlist.getRecords().get(0);
|
||||
if ( (rod.isSNP() || rod.isReference()) && rod.getReference().charAt(0) != ref.getBase() )
|
||||
if ( (rod.isSNP() || rod.isReference()) && Character.toUpperCase(rod.getReference().charAt(0)) != Character.toUpperCase(ref.getBase()) )
|
||||
throw new StingException("The reference base (" + ref.getBase() + ") does not match the base from the VCF record (" + rod.getReference() + ")");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -484,8 +484,8 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
}
|
||||
|
||||
public void setQual(double qual) {
|
||||
if (qual < 0)
|
||||
throw new IllegalArgumentException("Qual values must be greater than 0");
|
||||
if ( qual < 0 && MathUtils.compareDoubles(qual, -1.0) != 0 )
|
||||
throw new IllegalArgumentException("Qual values cannot be negative unless they are -1 ('unknown')");
|
||||
mQual = qual;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue