diff --git a/build.xml b/build.xml index 9a66d4699..d3e25d424 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java index d1d616c97..1f873ffbd 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -25,8 +25,10 @@ package org.broadinstitute.sting.commandline; import com.google.java.contract.Requires; +import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; +import org.broad.tribble.FeatureReader; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; @@ -77,27 +79,15 @@ public final class IntervalBinding { if ( featureIntervals != null ) { intervals = new ArrayList(); - //RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(), - // toolkit.getGenomeLocParser(), - // toolkit.getArguments().unsafe); - // TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec(); if ( codec instanceof ReferenceDependentFeatureCodec ) ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser()); try { - final FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource())); - final AsciiLineReader lineReader = new AsciiLineReader(fis); - codec.readHeader(lineReader); - String line = lineReader.readLine(); - while ( line != null ) { - final Feature feature = codec.decodeLoc(line); - if ( feature == null ) - throw new UserException.MalformedFile(featureIntervals.getSource(), "Couldn't parse line '" + line + "'"); + FeatureReader reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false); + for ( Feature feature : reader.iterator() ) intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature)); - line = lineReader.readLine(); - } } catch (Exception e) { throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index fdc3d2aa7..ebfef5dc1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -27,13 +27,12 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.picard.util.PeekableIterator; import net.sf.samtools.GATKBAMFileSpan; import net.sf.samtools.GATKChunk; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -265,7 +264,10 @@ public class BAMScheduler implements Iterator { // Naive algorithm: find all elements in current contig for proper schedule creation. List lociInContig = new LinkedList(); for(GenomeLoc locus: loci) { - if(!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded) + if (!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()) == null) + throw new ReviewedStingException("BAM file(s) do not have the contig: " + locus.getContig() + ". You are probably using a different reference than the one this file was aligned with"); + + if (!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded) lociInContig.add(locus); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java index bf3ce352a..641bddb2d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadFilter.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -16,4 +17,18 @@ public abstract class ReadFilter implements SamRecordFilter { * @param engine the engine. */ public void initialize(GenomeAnalysisEngine engine) {} + + + /** + * Determines whether a pair of SAMRecord matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the SAMRecords matches the filter, otherwise false + * @throws UnsupportedOperationException when paired filter not implemented + */ + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + throw new UnsupportedOperationException("Paired filter not implemented: " + this.getClass()); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index 4ca7b935f..25d339ee6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.io.storage; import net.sf.samtools.util.BlockCompressedOutputStream; import org.apache.log4j.Logger; -import org.broad.tribble.source.BasicFeatureSource; +import org.broad.tribble.AbstractFeatureReader; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; @@ -114,7 +114,7 @@ public class VCFWriterStorage implements Storage, VCFWriter { String sourceFilePath = file.getAbsolutePath(); String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin"; logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath)); - BasicFeatureSource source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false); + AbstractFeatureReader source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false); for ( VariantContext vc : source.iterator() ) { target.writer.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java index b4427c228..d24686ad7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; * A HACK. Tribble should contain all the information in needs to decode the unqualified position of * a feature. */ -public interface ReferenceDependentFeatureCodec extends FeatureCodec { +public interface ReferenceDependentFeatureCodec { /** * Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features. * @param genomeLocParser The parser to supply. diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 7aa112961..96bc874e0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; +import org.broad.tribble.AbstractFeatureReader; +import org.broad.tribble.CloseableTribbleIterator; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; -import org.broad.tribble.FeatureSource; -import org.broad.tribble.iterators.CloseableTribbleIterator; -import org.broad.tribble.source.PerformanceLoggingFeatureSource; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; @@ -57,7 +56,7 @@ public class RMDTrack { private final File file; // the associated file we create the reader from // our feature reader - allows queries - private FeatureSource reader; + private AbstractFeatureReader reader; // our sequence dictionary, which can be null private final SAMSequenceDictionary dictionary; @@ -92,7 +91,7 @@ public class RMDTrack { * @param dict the sam sequence dictionary * @param codec the feature codec we use to decode this type */ - public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { + public RMDTrack(Class type, String name, File file, AbstractFeatureReader reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { this.type = type; this.name = name; this.file = file; @@ -116,8 +115,6 @@ public class RMDTrack { public CloseableIterator query(GenomeLoc interval) throws IOException { CloseableTribbleIterator iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop()); - if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE ) - logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog()); return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName()); } @@ -130,10 +127,6 @@ public class RMDTrack { reader = null; } - public FeatureSource getReader() { - return reader; - } - /** * get the sequence dictionary from the track, if available * @return a SAMSequenceDictionary if available, null if unavailable diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index edb514984..2a83b9d34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; +import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.FeatureCodec; -import org.broad.tribble.FeatureSource; import org.broad.tribble.Tribble; import org.broad.tribble.TribbleException; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; -import org.broad.tribble.source.BasicFeatureSource; -import org.broad.tribble.source.PerformanceLoggingFeatureSource; import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; @@ -121,7 +119,7 @@ public class RMDTrackBuilder { // extends PluginManager { throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); // return a feature reader track - Pair pair; + Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) pair = createTabixIndexedFeatureSource(descriptor, name, inputFile); else @@ -155,11 +153,11 @@ public class RMDTrackBuilder { // extends PluginManager { * @param inputFile the file to load * @return a feature reader implementation */ - private Pair createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) { + private Pair createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) { // we might not know the index type, try loading with the default reader constructor logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); try { - return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null); + return new Pair(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null); } catch (TribbleException e) { throw new UserException(e.getMessage(), e); } @@ -183,12 +181,12 @@ public class RMDTrackBuilder { // extends PluginManager { * @param storageType How the RMD is streamed into the input file. * @return the input file as a FeatureReader */ - private Pair getFeatureSource(FeatureManager.FeatureDescriptor descriptor, + private Pair getFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile, RMDStorageType storageType) { // Feature source and sequence dictionary to use as the ultimate reference - FeatureSource featureSource = null; + AbstractFeatureReader featureSource = null; SAMSequenceDictionary sequenceDictionary = null; // Detect whether or not this source should be indexed. @@ -215,10 +213,7 @@ public class RMDTrackBuilder { // extends PluginManager { sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index); } - if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE ) - featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); - else - featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); + featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), index); } catch (TribbleException e) { throw new UserException(e.getMessage()); @@ -228,10 +223,10 @@ public class RMDTrackBuilder { // extends PluginManager { } } else { - featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false); + featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), false); } - return new Pair(featureSource,sequenceDictionary); + return new Pair(featureSource,sequenceDictionary); } /** @@ -358,7 +353,7 @@ public class RMDTrackBuilder { // extends PluginManager { private Index createIndexInMemory(File inputFile, FeatureCodec codec) { // this can take a while, let them know what we're doing logger.info("Creating Tribble index in memory for file " + inputFile); - Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); + Index idx = IndexFactory.createDynamicIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict); return idx; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index 104ba87b5..cfc1c36c6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.util.CloseableIterator; +import org.broad.tribble.CloseableTribbleIterator; import org.broad.tribble.Feature; -import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broadinstitute.sting.utils.GenomeLocParser; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 057dba1f7..3127f20ac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -54,8 +54,8 @@ import java.util.*; */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), + public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; + public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 57561a277..0d2b3478d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; @@ -16,10 +17,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -33,8 +31,12 @@ import java.util.Map; public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { private static final int MIN_SAMPLES = 10; + private Set founderIds; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + //If available, get the founder IDs and cache them. the IC will only be computed on founders then. + if(founderIds == null) + founderIds = ((Walker)walker).getSampleDB().getFounderIds(); return calculateIC(vc); } @@ -43,7 +45,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno } private Map calculateIC(final VariantContext vc) { - final GenotypesContext genotypes = vc.getGenotypes(); + final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 24a107235..51b834bd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -29,7 +29,7 @@ import java.util.Map; public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) + if ( !vc.hasLog10PError() || stratifiedContexts.size() == 0 ) return null; final GenotypesContext genotypes = vc.getGenotypes(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index c5aabc64d..e2d2a3d1f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -64,20 +64,20 @@ public class ContextCovariate implements StandardCovariate { } @Override - public CovariateValues getValues(GATKSAMRecord read) { + public CovariateValues getValues(final GATKSAMRecord read) { int l = read.getReadLength(); BitSet[] mismatches = new BitSet[l]; BitSet[] insertions = new BitSet[l]; BitSet[] deletions = new BitSet[l]; - read = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context + GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context - final boolean negativeStrand = read.getReadNegativeStrandFlag(); - byte[] bases = read.getReadBases(); + final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); + byte[] bases = clippedRead.getReadBases(); if (negativeStrand) bases = BaseUtils.simpleReverseComplement(bases); - for (int i = 0; i < read.getReadLength(); i++) { + for (int i = 0; i < clippedRead.getReadLength(); i++) { mismatches[i] = contextWith(bases, i, mismatchesContextSize); insertions[i] = contextWith(bases, i, insertionsContextSize); deletions[i] = contextWith(bases, i, deletionsContextSize); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index c9a6cb8f2..ce79c7138 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -25,15 +25,16 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.apache.log4j.Logger; +import org.broad.tribble.AbstractFeatureReader; +import org.broad.tribble.FeatureReader; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; +import java.util.Iterator; import java.util.Map; @@ -56,16 +57,14 @@ public class VCFDiffableReader implements DiffableReader { DiffNode root = DiffNode.rooted(file.getName()); try { // read the version line from the file - LineReader lineReader = new AsciiLineReader(new FileInputStream(file)); - final String version = lineReader.readLine(); + BufferedReader br = new BufferedReader(new FileReader(file)); + final String version = br.readLine(); root.add("VERSION", version); - lineReader.close(); - - lineReader = new AsciiLineReader(new FileInputStream(file)); - VCFCodec vcfCodec = new VCFCodec(); + br.close(); // must be read as state is stored in reader itself - VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader); + FeatureReader reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false); + VCFHeader header = (VCFHeader)reader.getHeader(); for ( VCFHeaderLine headerLine : header.getMetaData() ) { String key = headerLine.getKey(); if ( headerLine instanceof VCFIDHeaderLine) @@ -76,14 +75,14 @@ public class VCFDiffableReader implements DiffableReader { root.add(key, headerLine.toString()); } - String line = lineReader.readLine(); int count = 0, nRecordsAtPos = 1; String prevName = ""; - while ( line != null ) { + Iterator it = reader.iterator(); + while ( it.hasNext() ) { if ( count++ > maxElementsToRead && maxElementsToRead != -1) break; - VariantContext vc = (VariantContext)vcfCodec.decode(line); + VariantContext vc = it.next(); String name = vc.getChr() + ":" + vc.getStart(); if ( name.equals(prevName) ) { name += "_" + ++nRecordsAtPos; @@ -121,10 +120,9 @@ public class VCFDiffableReader implements DiffableReader { } root.add(vcRoot); - line = lineReader.readLine(); } - lineReader.close(); + reader.close(); } catch ( IOException e ) { return null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 8cf078e6e..fef34d5fd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -82,6 +82,9 @@ public class PhaseByTransmission extends RodWalker, HashMa @Argument(shortName = "prior",required = false,fullName = "DeNovoPrior", doc="Prior for de novo mutations. Default: 1e-8") private double deNovoPrior=1e-8; + @Argument(shortName = "fatherAlleleFirst",required = false,fullName = "FatherAlleleFirst", doc="Ouputs the father allele as the first allele in phased child genotype. i.e. father|mother rather than mother|father.") + private boolean fatherFAlleleFirst=false; + @Output protected VCFWriter vcfWriter = null; @@ -183,12 +186,15 @@ public class PhaseByTransmission extends RodWalker, HashMa ArrayList parentPhasedAlleles = new ArrayList(2); ArrayList childPhasedAlleles = new ArrayList(2); - //If there is a possible phasing between the mother and child => phase + //If there is a possible phasing between the parent and child => phase int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0)); if(childTransmittedAlleleIndex > -1){ trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_LOG10_PERROR, null, null, true)); childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex)); - childPhasedAlleles.add(childAlleles.get(0)); + if(parent.equals(FamilyMember.MOTHER)) + childPhasedAlleles.add(childAlleles.get(0)); + else + childPhasedAlleles.add(0,childAlleles.get(0)); trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true)); } else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){ @@ -196,7 +202,10 @@ public class PhaseByTransmission extends RodWalker, HashMa parentPhasedAlleles.add(parentAlleles.get(0)); trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true)); childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex)); - childPhasedAlleles.add(childAlleles.get(0)); + if(parent.equals(FamilyMember.MOTHER)) + childPhasedAlleles.add(childAlleles.get(0)); + else + childPhasedAlleles.add(0,childAlleles.get(0)); trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true)); } //This is a Mendelian Violation => Do not phase @@ -296,6 +305,14 @@ public class PhaseByTransmission extends RodWalker, HashMa else{ phaseFamilyAlleles(mother, father, child); } + + //If child should phased genotype should be father first, then swap the alleles + if(fatherFAlleleFirst && trioPhasedGenotypes.get(FamilyMember.CHILD).isPhased()){ + ArrayList childAlleles = new ArrayList(trioPhasedGenotypes.get(FamilyMember.CHILD).getAlleles()); + childAlleles.add(childAlleles.remove(0)); + trioPhasedGenotypes.put(FamilyMember.CHILD,new Genotype(DUMMY_NAME,childAlleles,Genotype.NO_LOG10_PERROR,null,null,true)); + } + } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 18b8424b2..a1654ec8a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -189,6 +190,8 @@ public class CombineVariants extends RodWalker { Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); if ( SET_KEY != null ) headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants")); + if ( !ASSUME_IDENTICAL_SAMPLES ) + headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions)); VCFHeader vcfHeader = new VCFHeader(headerLines, sitesOnlyVCF ? Collections.emptySet() : samples); vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER); vcfWriter.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java index e4768fd5b..ecd396bf2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java @@ -25,16 +25,14 @@ package org.broadinstitute.sting.utils.codecs.beagle; */ +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.Feature; import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -63,7 +61,7 @@ import java.util.regex.Pattern; * @author Mark DePristo * @since 2010 */ -public class BeagleCodec implements ReferenceDependentFeatureCodec { +public class BeagleCodec extends AsciiFeatureCodec implements ReferenceDependentFeatureCodec { private String[] header; public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2}; private BeagleReaderType readerType; @@ -80,25 +78,16 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec getFeatureType() { - return BeagleFeature.class; - } - public BeagleFeature decode(String line) { String[] tokens; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java index 8bdb24b6c..916fb43ea 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java @@ -24,8 +24,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap; -import org.broad.tribble.AbstractFeatureCodec; -import org.broad.tribble.Feature; +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.annotation.Strand; import org.broad.tribble.readers.LineReader; @@ -71,18 +70,14 @@ import java.util.Arrays; * @author Mark DePristo * @since 2010 */ -public class RawHapMapCodec extends AbstractFeatureCodec { +public class RawHapMapCodec extends AsciiFeatureCodec { // the minimum number of features in the HapMap file line private static final int minimumFeatureCount = 11; private String headerLine; - /** - * decode the location only - * @param line the input line to decode - * @return a HapMapFeature - */ - public Feature decodeLoc(String line) { - return decode(line); + + public RawHapMapCodec() { + super(RawHapMapFeature.class); } /** @@ -90,7 +85,7 @@ public class RawHapMapCodec extends AbstractFeatureCodec { * @param line the input line to decode * @return a HapMapFeature, with the given fields */ - public Feature decode(String line) { + public RawHapMapFeature decode(String line) { String[] array = line.split("\\s+"); // make sure the split was successful - that we got an appropriate number of fields @@ -113,10 +108,6 @@ public class RawHapMapCodec extends AbstractFeatureCodec { headerLine); } - public Class getFeatureType() { - return RawHapMapFeature.class; - } - public Object readHeader(LineReader reader) { try { headerLine = reader.readLine(); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index cb392f29c..736c989c6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.utils.codecs.refseq; +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; -import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -46,13 +46,18 @@ import java.util.ArrayList; * @author Mark DePristo * @since 2010 */ -public class RefSeqCodec implements ReferenceDependentFeatureCodec { +public class RefSeqCodec extends AsciiFeatureCodec implements ReferenceDependentFeatureCodec { /** * The parser to use when resolving genome-wide locations. */ private GenomeLocParser genomeLocParser; private boolean zero_coding_length_user_warned = false; + + public RefSeqCodec() { + super(RefSeqFeature.class); + } + /** * Set the parser to use when resolving genetic data. * @param genomeLocParser The supplied parser. @@ -130,17 +135,4 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec getFeatureType() { - return RefSeqFeature.class; - } - - public boolean canDecode(final String potentialInput) { return false; } - } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java index d9f16c353..f4ba185b4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java @@ -25,10 +25,9 @@ package org.broadinstitute.sting.utils.codecs.sampileup; -import org.broad.tribble.AbstractFeatureCodec; +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.Feature; import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; import java.util.ArrayList; @@ -76,7 +75,7 @@ import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.V * @author Matt Hanna * @since 2009 */ -public class SAMPileupCodec extends AbstractFeatureCodec { +public class SAMPileupCodec extends AsciiFeatureCodec { // the number of tokens we expect to parse from a pileup line private static final int expectedTokenCount = 10; private static final char fldDelim = '\t'; @@ -88,24 +87,8 @@ public class SAMPileupCodec extends AbstractFeatureCodec { private static final String baseT = "T"; private static final String emptyStr = ""; // we will use this for "reference" allele in insertions - /** - * Return the # of header lines for this file. - * - * @param reader the line reader - * @return 0 in this case, we assume no header lines. - */ - public Object readHeader(LineReader reader) { - // we don't require a header line, but it may exist. We'll deal with that above. - return null; - } - - @Override - public Class getFeatureType() { - return SAMPileupFeature.class; - } - - public Feature decodeLoc(String line) { - return decode(line); + public SAMPileupCodec() { + super(SAMPileupFeature.class); } public SAMPileupFeature decode(String line) { @@ -285,5 +268,4 @@ public class SAMPileupCodec extends AbstractFeatureCodec { feature.setPileupBases(baseBuilder.toString()); feature.setPileupQuals(qualBuilder.toString()); } - } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java index 0f2b94e63..4459c44f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java @@ -27,10 +27,9 @@ package org.broadinstitute.sting.utils.codecs.samread; import net.sf.samtools.Cigar; import net.sf.samtools.TextCigarCodec; import net.sf.samtools.util.StringUtil; -import org.broad.tribble.AbstractFeatureCodec; +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.Feature; import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; /** @@ -52,31 +51,14 @@ import org.broad.tribble.util.ParsingUtils; * @author Matt Hanna * @since 2009 */ -public class SAMReadCodec extends AbstractFeatureCodec { +public class SAMReadCodec extends AsciiFeatureCodec { /* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */ // the number of tokens we expect to parse from a read line private static final int expectedTokenCount = 11; - /** - * Return the # of header lines for this file. - * - * @param reader the line reader - * @return 0 in this case, we assume no header lines. The reads file may have a - * header line beginning with '@', but we can ignore that in the decode function. - */ - public Object readHeader(LineReader reader) { - // we don't require a header line, but it may exist. We'll deal with that above. - return null; - } - - @Override - public Class getFeatureType() { - return SAMReadFeature.class; - } - - public Feature decodeLoc(String line) { - return decode(line); + public SAMReadCodec() { + super(SAMReadFeature.class); } /** @@ -131,6 +113,4 @@ public class SAMReadCodec extends AbstractFeatureCodec { bases, qualities); } - - } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java index fdcc8ed10..5937d1f1f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java @@ -23,7 +23,7 @@ import java.util.Arrays; public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec { @Override - public Feature decode(String line) { + public TableFeature decode(String line) { if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter)) return null; String[] split = line.split(delimiterRegex); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java index aa6d7d345..22d754098 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.codecs.table; -import org.broad.tribble.Feature; +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -39,7 +39,7 @@ import java.util.Arrays; * @author Mark DePristo * @since 2009 */ -public class TableCodec implements ReferenceDependentFeatureCodec { +public class TableCodec extends AsciiFeatureCodec implements ReferenceDependentFeatureCodec { final static protected String delimiterRegex = "\\s+"; final static protected String headerDelimiter = "HEADER"; final static protected String igvHeaderDelimiter = "track"; @@ -52,6 +52,10 @@ public class TableCodec implements ReferenceDependentFeatureCodec { */ protected GenomeLocParser genomeLocParser; + public TableCodec() { + super(TableFeature.class); + } + /** * Set the parser to use when resolving genetic data. * @param genomeLocParser The supplied parser. @@ -61,14 +65,8 @@ public class TableCodec implements ReferenceDependentFeatureCodec { this.genomeLocParser = genomeLocParser; } - @Override - public Feature decodeLoc(String line) { - return decode(line); - } - - @Override - public Feature decode(String line) { + public TableFeature decode(String line) { if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter)) return null; String[] split = line.split(delimiterRegex); @@ -77,11 +75,6 @@ public class TableCodec implements ReferenceDependentFeatureCodec { return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header); } - @Override - public Class getFeatureType() { - return TableFeature.class; - } - @Override public Object readHeader(LineReader reader) { String line = ""; @@ -106,7 +99,4 @@ public class TableCodec implements ReferenceDependentFeatureCodec { } return header; } - - public boolean canDecode(final String potentialInput) { return false; } - } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 7d39dc789..66ed908eb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.apache.log4j.Logger; +import org.broad.tribble.AsciiFeatureCodec; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; @@ -10,14 +10,20 @@ import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.LazyGenotypesContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import java.io.*; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; import java.util.*; import java.util.zip.GZIPInputStream; -public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { +public abstract class AbstractVCFCodec extends AsciiFeatureCodec implements NameAwareCodec { public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20); protected final static Logger log = Logger.getLogger(VCFCodec.class); @@ -61,6 +67,10 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { protected Map stringCache = new HashMap(); + protected AbstractVCFCodec() { + super(VariantContext.class); + } + /** * Creates a LazyParser for a LazyGenotypesContext to use to decode * our genotypes only when necessary. We do this instead of eagarly @@ -266,7 +276,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { * @param line the line * @return a VariantContext */ - public Feature decode(String line) { + public VariantContext decode(String line) { // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; @@ -378,14 +388,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { return vc; } - /** - * - * @return the type of record - */ - public Class getFeatureType() { - return VariantContext.class; - } - /** * get the name of this codec * @return our set name diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java index 71ec4ce1b..0cca77e4a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java @@ -28,12 +28,10 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; -import org.broad.tribble.TribbleException; import org.broad.tribble.index.DynamicIndexCreator; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.util.LittleEndianOutputStream; -import org.broad.tribble.util.PositionalStream; import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -142,3 +140,31 @@ public abstract class IndexingVCFWriter implements VCFWriter { } } } + +class PositionalStream extends OutputStream { + OutputStream out = null; + private long position = 0; + + public PositionalStream(OutputStream out) { + this.out = out; + } + + public void write(final byte[] bytes) throws IOException { + write(bytes, 0, bytes.length); + } + + public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException { + //System.out.println("write: " + bytes + " " + numBytes); + position += numBytes; + out.write(bytes, startIndex, numBytes); + } + + public void write(int c) throws IOException { + System.out.println("write byte: " + c); + //System.out.printf("Position %d for %c\n", position, (char)c); + position++; + out.write(c); + } + + public long getPosition() { return position; } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index ac1da7110..75b396bd9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -108,12 +108,17 @@ public class StandardVCFWriter extends IndexingVCFWriter { // write out the column line mWriter.write(VCFHeader.HEADER_INDICATOR); + boolean isFirst = true; for ( VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields() ) { + if ( isFirst ) + isFirst = false; // don't write out a field separator + else + mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(field.toString()); - mWriter.write(VCFConstants.FIELD_SEPARATOR); } if ( mHeader.hasGenotypingData() ) { + mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write("FORMAT"); for ( String sample : mHeader.getGenotypeSamples() ) { mWriter.write(VCFConstants.FIELD_SEPARATOR); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java index dd0a333f3..006913959 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java @@ -24,7 +24,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine { * @param line the header line * @param version the vcf header version */ - protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { + public VCFFilterHeaderLine(String line, VCFHeaderVersion version) { super(line, version, "FILTER", Arrays.asList("ID", "Description")); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java index 474c8dd14..5fc3187c5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java @@ -20,7 +20,7 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine { super(name, count, type, description, SupportedHeaderLineType.FORMAT); } - protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) { + public VCFFormatHeaderLine(String line, VCFHeaderVersion version) { super(line, version, SupportedHeaderLineType.FORMAT); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 50ff3a656..20f71b956 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -112,7 +112,7 @@ public class VCFHeader { * @param genotypeSampleNamesInAppearenceOrder genotype sample names */ - protected void buildVCFReaderMaps(List genotypeSampleNamesInAppearenceOrder) { + public void buildVCFReaderMaps(List genotypeSampleNamesInAppearenceOrder) { sampleNamesInOrder = new ArrayList(genotypeSampleNamesInAppearenceOrder.size()); sampleNameToOffset = new HashMap(genotypeSampleNamesInAppearenceOrder.size()); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java index 9b20f38a1..9f249c531 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java @@ -17,7 +17,7 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine { super(name, count, type, description, SupportedHeaderLineType.INFO); } - protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) { + public VCFInfoHeaderLine(String line, VCFHeaderVersion version) { super(line, version, SupportedHeaderLineType.INFO); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java index 05d603073..e0a057eec 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java @@ -48,7 +48,7 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin * @param key the key for this header line * @param expectedTagOrdering the tag ordering expected for this header line */ - protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List expectedTagOrdering) { + public VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List expectedTagOrdering) { super(key, ""); Map mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering); name = mapping.get("ID"); diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index f477fedc9..1c5f8431c 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -76,7 +76,7 @@ public class WalkerTest extends BaseTest { public static void assertOnDiskIndexEqualToNewlyCreatedIndex(final File indexFile, final String name, final File resultFile) { System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile); - Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec()); + Index indexFromOutputFile = IndexFactory.createDynamicIndex(resultFile, new VCFCodec()); Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath()); if ( ! indexFromOutputFile.equalsIgnoreProperties(dynamicIndex) ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index 1b2a6e82e..e7169321c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -24,7 +24,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_1.vcf"), 1, - Arrays.asList("89a1c56f264ac27a2a4be81072473b6f")); + Arrays.asList("444a20659f67592a8284e0b7849e4302")); executeTest("Test symbolic alleles", spec); } @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); + Arrays.asList("93a24c019663a6011b4d6de12538df11")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 7a0d78b88..a3cd2d39f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -54,7 +54,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking2() { - // this genotype annotations in this file are actually out of order. If you don't parse the genotypes + // the genotype annotations in this file are actually out of order. If you don't parse the genotypes // they don't get reordered. It's a good test of the genotype ordering system. WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, @@ -126,6 +126,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("getting DB tag with HM3", spec); } + @Test + public void testNoQuals() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " --variant " + validationDataLocation + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + validationDataLocation + "noQual.vcf -A QualByDepth", 1, + Arrays.asList("e531c9f90c17f0f859cd1ac851a8edd8")); + executeTest("test file doesn't have QUALs", spec); + } + @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( @@ -144,7 +152,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTabixAnnotations() { - final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; + final String MD5 = "bb9a148716fc69d706c5be146c1afa00"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -A HomopolymerRun --variant:vcf " + validationDataLocation + file + " -L " + validationDataLocation + "CEU.exon.2010_03.sites.vcf -NO_HEADER", 1, @@ -198,4 +206,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing ChromosomeCounts annotation with PED file", spec); } + @Test + public void testInbreedingCoeffPed() { + final String MD5 = "7f1314fada5cb1f35ba1996f8a7a686b"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1, + Arrays.asList(MD5)); + executeTest("Testing InbreedingCoeff annotation with PED file", spec); + } + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 464dfb06e..d031d393c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -172,7 +172,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" ); + e.put( "-sites_only", "446ed9a72d210671ed48aa1f572b77e3" ); e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "ecf92054c1e4bd9d6529b8002d385165" ); e.put( "--output_mode EMIT_ALL_SITES", "e10819a2a7960254e27ed2b958b45d56" ); @@ -376,7 +376,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("973178b97efd2daacc9e45c414275d59")); + Arrays.asList("1e4595c85159bd9b62eff575134b5dd9")); executeTest("test minIndelFraction 0.0", spec); } @@ -384,7 +384,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("220facd2eb0923515d1d8ab874055564")); + Arrays.asList("6d9dbb949626fdb0bca6a7904e9b0c1f")); executeTest("test minIndelFraction 0.25", spec); } @@ -392,7 +392,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction100() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 1", 1, - Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a")); + Arrays.asList("a31a355bc2b8257b2a45494e97322694")); executeTest("test minIndelFraction 1.0", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 0ff6fc244..040845828 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -9,13 +9,13 @@ import java.util.Map; public class IndelRealignerIntegrationTest extends WalkerTest { - private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.fixed.bam"; + private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.fixed.fixmates.bam"; private static final String mainTestIntervals = validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals"; private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf"; private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 "; private static final String baseCommand = baseCommandPrefix + "-o %s "; - private static final String base_md5 = "e041186bca9dccf360747c89be8417ad"; - private static final String base_md5_with_SW_or_VCF = "d7c7acd346ee4c8d34f1e2499ff7c313"; + private static final String base_md5 = "7574ab7d0b1ee5d44a0b3f85b6e944e6"; + private static final String base_md5_with_SW_or_VCF = "a918d69d26d3c87b29002ed31f428c48"; @Test public void testDefaults() { @@ -38,7 +38,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec1 = new WalkerTestSpec( baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels, 1, - Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888")); + Arrays.asList("36718f10d523dfb0fa2a709480f24bd4")); executeTest("realigner known indels only from VCF", spec1); } @@ -55,7 +55,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { public void testLods() { HashMap e = new HashMap(); e.put( "-LOD 60", base_md5 ); - e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "f158f18198cf48bbb3d4f1b7127928a3" ); + e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "9a75a0f7ad0442c78d0f8df260e733a4" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -71,7 +71,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s", 1, - Arrays.asList("fe39c007d287d372a8137d11c60fbc50")); + Arrays.asList("e98f51d71f0a82141b36a7e9f94db237")); executeTest("realigner long run", spec); } @@ -80,7 +80,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW", 1, - Arrays.asList("e77e59cc6363cf58f392ce5ea8d7e0b6")); + Arrays.asList("58ac675d0699eb236d469b8e84513d11")); executeTest("realigner no output tags", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 2cd76e7a5..48be3264f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14") + Arrays.asList("d54a142d68dca54e478c13f9a0e4c95c","1a37fcc93a73429f9065b942ab771233") ); executeTest("testTrueNegativeMV", spec); } @@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556") + Arrays.asList("883ea7fd2b200c4b7fa95a4f7aa15931","7b1f5309c3d4f4aa7e9061f288dceb68") ); executeTest("testTruePositiveMV", spec); } @@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee") + Arrays.asList("e812d62a3449b74b6948ee7deb8a0790","d00922496759e84c66a4b5e222e36997") ); executeTest("testFalsePositiveMV", spec); } @@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3") + Arrays.asList("e3c572f933a40e1878a2cfa52049517a","60e4f0be344fb944ab3378f9ab27da64") ); executeTest("testSpecialCases", spec); } @@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e") + Arrays.asList("b42af3b73a2cb38cfc92f8047dd686b3","a69c3f9c005e852b44c29ab25e87ba0d") ); executeTest("testPriorOption", spec); } @@ -128,9 +128,30 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("398724bc1e65314cc5ee92706e05a3ee") + Arrays.asList("d00922496759e84c66a4b5e222e36997") ); executeTest("testMVFileOption", spec); } + //Test when running with the fatherAlleleFirst option + @Test + public void testFatherAlleleFirst() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T PhaseByTransmission", + "-NO_HEADER", + "-R " + b37KGReference, + "--variant " + TPTest, + "-ped "+ goodFamilyFile, + "-L 1:10109-10315", + "-mvf %s", + "-o %s", + "-fatherAlleleFirst" + ), + 2, + Arrays.asList("c158a3816357597543ef85c4478c41e8","4f8daca19c8f31bd87850c124f91e330") + ); + executeTest("testFatherAlleleFirst", spec); + } + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationSiteSelectorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationSiteSelectorIntegrationTest.java index d7c866a0a..1a4de3e87 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationSiteSelectorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationSiteSelectorIntegrationTest.java @@ -33,7 +33,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(sampleNone + freqUnif + "--variant " + testfile), 1, - Arrays.asList("d49baeb8000a426c172ce1d81eb37963") + Arrays.asList("6a9e990a9252840904b5144213915b32") ); executeTest("testNoSampleSelectionFreqUniform--" + testfile, spec); @@ -45,7 +45,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(sampleNone + freqAF + "--variant " + testfile), 1, - Arrays.asList("0fb0d015d462c34514fc7e96beea5f56") + Arrays.asList("eaa2385086cddff68cf4fdb81cbdbbb9") ); executeTest("testNoSampleSelectionFreqAF--" + testfile, spec); @@ -57,7 +57,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(sampleGT + freqUnif + "--variant " + testfile), 1, - Arrays.asList("0672854299d42ea8af906976a3849ae6") + Arrays.asList("24077656f590d6905546f7e019c8dccb") ); executeTest("testPolyGTFreqUniform--" + testfile, spec); @@ -69,7 +69,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(sampleGT + freqAF + "--variant " + testfile), 1, - Arrays.asList("5bdffda1a063d0bddd6b236854ec627d") + Arrays.asList("3c1180fd9b5e80e540b39c5a95fbe722") ); executeTest("testPolyGTFreqAF--" + testfile, spec); @@ -81,7 +81,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(sampleGL + freqAF + "--variant " + testfile), 1, - Arrays.asList("35ef16aa41303606a4b94f7b88bd9aa8") + Arrays.asList("ad30c028864348204ebe80b9c8c503e8") ); executeTest("testPolyGLFreqAF--" + testfile, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 879a5bfa3..79a58c820 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -27,8 +27,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", "0ddd1e0e483d2eaf56004615cea23ec7", // tranches - "f8e21a1987960b950db1f0d98be45352", // recal file - "f67d844b6252a55452cf4167b77530b1"); // cut VCF + "a45a78de049cfe767ce23d3423f80b01", // recal file + "1050c387d170639f8cec221e5dddd626"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -74,8 +74,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf", "da4458d05f6396f5c4ab96f274e5ccdc", // tranches - "cf380d9b0ae04c8918be8425f82035b4", // recal file - "b00e5e5a6807df8ed1682317948e8a6d"); // cut VCF + "918a5ecad5a2a8a46795144366683188", // recal file + "bf0e8ed5e250d52f0545074c61217d16"); // cut VCF @DataProvider(name = "VRIndelTest") public Object[][] createData2() { @@ -131,7 +131,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -o %s" + " -tranchesFile " + validationDataLocation + "VQSR.mixedTest.tranches" + " -recalFile " + validationDataLocation + "VQSR.mixedTest.recal", - Arrays.asList("08060b7f5c9cf3bb1692b50c58fd5a4b")); + Arrays.asList("9039576b63728df7ee2c881817c0e9eb")); executeTest("testApplyRecalibrationSnpAndIndelTogether", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 5282c9e58..649a76941 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -78,26 +78,26 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ea0a660cd04101ce7b534aba0310721d"); } - @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "cb0350e7a9d2483993482b69f5432b64", " -setKey foo"); } - @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "0571c48cc59cf244779caae52d562e79", " -setKey null"); } - @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0"); } // official project VCF files in tabix format + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c1e82f0842ca721d10f21604f26a5248"); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "b2fcf3983cc9e667b9bbed8372080776", " -setKey foo"); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "98c0cbb94e5debf7545a656665a1b659", " -setKey null"); } + @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "10170f9e72cc831a5820bd03e70fe46a"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "75901304abc1daa41b1906f881aa7bbc"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e"); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "074e909f80ffcc9fddc3fac89ea36bef"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f26980af214011c0452b8ce843f3063b"); } - @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "d08e933b6c81246e998d3ece50ddfdcc"); } + @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "7c337c8752abeffb0c9a4ee35d1a1451"); } - @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "01967686e0e02dbccd2590b70f2d049b"); } // official project VCF files in tabix format - @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "8c113199c4a93a4a408104b735d59044"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "30e96a0cb614cd5bc056e1f7ec6d10bd"); } + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "06e86711bcf0efc0f0c4a378f6147cf6"); } // official project VCF files in tabix format + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "03103f6b39e9fb7a396df0013f01fae6"); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "12fc1b8145f7884762f0c2cbbd319ae1"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "7e2dba80ba38b2a86713f635d630eb59"); } - @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "78a49597f1abf1c738e67d50c8fbed2b"); } + @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "63fc20d6223e1387563a1164987d716c"); } - @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4c63bfa5f73793aaca42e130ec49f238"); } - @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "86e326acbd8d2af8a6040eb146d92fc6"); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "5c60eb8d5d4b957a0cf52ca008f021ba"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "774b43e69cc7ec93090b4f6e9f4a1079"); } @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("ee43a558fd3faeaa447acab89f0001d5")); + Arrays.asList("988f9d294a8ff4278e40e76a72200bf4")); executeTest("threeWayWithRefs", spec); } @@ -127,17 +127,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void complexTestFull() { combineComplexSites("", "2842337e9943366f7a4d5f148f701b8c"); } - @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "39724318e6265d0318a3fe4609612785"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); } - @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); } + @Test public void complexTestFull() { combineComplexSites("", "dd805f6edfc3cf724512dfbbe8df5183"); } + @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "14a205edb022f79abf1863588cfee56b"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "e118d04d1d47c02ad38c046561a9f616"); } + @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "e118d04d1d47c02ad38c046561a9f616"); } @Test public void combineDBSNPDuplicateSites() { WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, 1, - Arrays.asList("5969446769cb8377daa2db29304ae6b5")); + Arrays.asList("a838dc241cf357466cd4331fd298c73a")); executeTest("combineDBSNPDuplicateSites:", spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java index a64d0b5ab..464dcd807 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; @@ -63,7 +64,7 @@ public class CombineVariantsUnitTest { private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); - VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr))); + VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr)))); return head; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java index 2139a53e7..942bedc82 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java @@ -40,7 +40,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", 1, - Arrays.asList("158b1d71b28c52e2789f164500b53732")); + Arrays.asList("8e0991576518823b339a4e2f83299d4f")); executeTest("test left alignment", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index f969d8752..1308cb1e6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -136,7 +136,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER", 1, - Arrays.asList("20b52c96f5c48258494d072752b53693") + Arrays.asList("ffa2524380d84a870d2e4a33d9f3d31a") ); executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index 95fafac8d..b56d4ef87 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -19,7 +19,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingDbsnpInput() { List md5 = new ArrayList(); - md5.add("d64942fed2a5b7b407f9537dd2b4832e"); + md5.add("a26afcce2a89f905a49c3d09719586b2"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java index 5fd4c610e..914783ca8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/hapmap/HapMapUnitTest.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap; import org.broad.tribble.annotation.Strand; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.PositionalBufferedStream; import org.testng.Assert; import org.testng.annotations.Test; @@ -152,7 +153,7 @@ public class HapMapUnitTest { public AsciiLineReader getReader() { try { - return new AsciiLineReader(new FileInputStream(hapMapFile)); + return new AsciiLineReader(new PositionalBufferedStream(new FileInputStream(hapMapFile))); } catch (FileNotFoundException e) { Assert.fail("Unable to open hapmap file : " + hapMapFile); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 55bd4783b..8da11c3d7 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; import net.sf.samtools.SAMSequenceDictionary; +import org.broad.tribble.AbstractFeatureReader; +import org.broad.tribble.CloseableTribbleIterator; import org.broad.tribble.Tribble; import org.broad.tribble.index.*; -import org.broad.tribble.iterators.CloseableTribbleIterator; -import org.broad.tribble.source.BasicFeatureSource; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -45,14 +45,14 @@ public class IndexFactoryUnitTest extends BaseTest { // @Test public void testOnTheFlyIndexing1() throws IOException { - Index indexFromInputFile = IndexFactory.createIndex(inputFile, new VCFCodec()); + Index indexFromInputFile = IndexFactory.createDynamicIndex(inputFile, new VCFCodec()); if ( outputFileIndex.exists() ) { System.err.println("Deleting " + outputFileIndex); outputFileIndex.delete(); } for ( int maxRecords : Arrays.asList(0, 1, 10, 100, 1000, -1)) { - BasicFeatureSource source = new BasicFeatureSource(inputFile.getAbsolutePath(), indexFromInputFile, new VCFCodec()); + AbstractFeatureReader source = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), indexFromInputFile); int counter = 0; VCFWriter writer = new StandardVCFWriter(outputFile, dict); @@ -66,7 +66,7 @@ public class IndexFactoryUnitTest extends BaseTest { // test that the input index is the same as the one created from the identical input file // test that the dynamic index is the same as the output index, which is equal to the input index - WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile); + //WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile); } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index b7bbae68d..30e86af34 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -35,7 +35,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("76075307afd26b4db6234795d9fb3c2f")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("acee3b6bdb4b759992f54065c675a249")); executeTest("Test reading and writing breakpoint VCF", spec1); } diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index 14e63191d..a6bfffab1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; @@ -24,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest { private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); - VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr))); + VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr)))); Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount); return header; } diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index 96a33b738..7059cdced 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.AbstractFeatureReader; +import org.broad.tribble.FeatureReader; import org.broad.tribble.Tribble; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -59,16 +62,10 @@ public class VCFWriterUnitTest extends BaseTest { writer.add(createVC(header)); writer.add(createVC(header)); writer.close(); - VCFCodec reader = new VCFCodec(); - AsciiLineReader lineReader; + VCFCodec codec = new VCFCodec(); VCFHeader headerFromFile = null; - try { - lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile)); - headerFromFile = (VCFHeader)reader.readHeader(lineReader); - } - catch (FileNotFoundException e ) { - throw new ReviewedStingException(e.getMessage()); - } + FeatureReader reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false); + headerFromFile = (VCFHeader)reader.getHeader(); int counter = 0; @@ -76,12 +73,9 @@ public class VCFWriterUnitTest extends BaseTest { validateHeader(headerFromFile); try { - while(true) { - String line = lineReader.readLine(); - if (line == null) - break; - - VariantContext vc = (VariantContext)reader.decode(line); + Iterator it = reader.iterator(); + while(it.hasNext()) { + VariantContext vc = it.next(); counter++; } Assert.assertEquals(counter, 2); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java index a71949369..e38466beb 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java @@ -78,30 +78,31 @@ public class VariantContextBenchmark extends SimpleBenchmark { private GenomeLocParser b37GenomeLocParser; @Override protected void setUp() { - try { - ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference)); - b37GenomeLocParser = new GenomeLocParser(seq); - } catch ( FileNotFoundException e) { - throw new RuntimeException(e); - } - - // read it into a String so that we don't try to benchmark IO issues - try { - FileInputStream s = new FileInputStream(new File(vcfFile)); - AsciiLineReader lineReader = new AsciiLineReader(s); - int counter = 0; - StringBuffer sb = new StringBuffer(); - while (counter++ < linesToRead ) { - String line = lineReader.readLine(); - if ( line == null ) - break; - sb.append(line + "\n"); - } - s.close(); - INPUT_STRING = sb.toString(); - } catch (IOException e) { - throw new RuntimeException(e); - } + // TODO -- update for new tribble interface +// try { +// ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference)); +// b37GenomeLocParser = new GenomeLocParser(seq); +// } catch ( FileNotFoundException e) { +// throw new RuntimeException(e); +// } +// +// // read it into a String so that we don't try to benchmark IO issues +// try { +// FileInputStream s = new FileInputStream(new File(vcfFile)); +// AsciiLineReader lineReader = new AsciiLineReader(s); +// int counter = 0; +// StringBuffer sb = new StringBuffer(); +// while (counter++ < linesToRead ) { +// String line = lineReader.readLine(); +// if ( line == null ) +// break; +// sb.append(line + "\n"); +// } +// s.close(); +// INPUT_STRING = sb.toString(); +// } catch (IOException e) { +// throw new RuntimeException(e); +// } } private interface FunctionToBenchmark { @@ -109,23 +110,24 @@ public class VariantContextBenchmark extends SimpleBenchmark { } private void runBenchmark(FeatureCodec codec, FunctionToBenchmark func) { - try { - InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes()); - AsciiLineReader lineReader = new AsciiLineReader(is); - codec.readHeader(lineReader); - - int counter = 0; - while (counter++ < linesToRead ) { - String line = lineReader.readLine(); - if ( line == null ) - break; - - T vc = codec.decode(line); - func.run(vc); - } - } catch (Exception e) { - System.out.println("Benchmarking run failure because of " + e.getMessage()); - } + // TODO -- update for new Tribble interface +// try { +// InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes()); +// AsciiLineReader lineReader = new AsciiLineReader(is); +// codec.readHeader(lineReader); +// +// int counter = 0; +// while (counter++ < linesToRead ) { +// String line = lineReader.readLine(); +// if ( line == null ) +// break; +// +// T vc = codec.decode(line); +// func.run(vc); +// } +// } catch (Exception e) { +// System.out.println("Benchmarking run failure because of " + e.getMessage()); +// } } public void timeV14(int rep) { diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala index f2592e05c..1f18858e1 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala @@ -4,14 +4,16 @@ import java.io.File import org.apache.commons.io.FilenameUtils import scala.io.Source._ import net.sf.samtools.SAMFileReader -import org.broad.tribble.source.BasicFeatureSource import org.broadinstitute.sting.utils.codecs.vcf.{VCFHeader, VCFCodec} import scala.collection.JavaConversions._ +import org.broad.tribble.{FeatureCodec, AbstractFeatureReader} +import org.broadinstitute.sting.utils.variantcontext.VariantContext object VCF_BAM_utilities { def getSamplesFromVCF(vcfFile: File): List[String] = { - return BasicFeatureSource.getFeatureSource(vcfFile.getPath(), new VCFCodec()).getHeader().asInstanceOf[VCFHeader].getGenotypeSamples().toList + val codec: FeatureCodec[VariantContext] = new VCFCodec().asInstanceOf[FeatureCodec[VariantContext]] + AbstractFeatureReader.getFeatureReader(vcfFile.getPath, codec).getHeader.asInstanceOf[VCFHeader].getGenotypeSamples.toList } def getSamplesInBAM(bam: File): List[String] = { diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 7e1d09b70..95455e812 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -42,7 +42,7 @@ class DataProcessingPipelineTest { " -nv ", " -test ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936" + spec.fileMD5s += testOut -> "0de95b5642e41e11ecd6fa1770242b88" PipelineTest.executeTest(spec) } @@ -62,7 +62,7 @@ class DataProcessingPipelineTest { " -bwa /home/unix/carneiro/bin/bwa", " -bwape ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708" + spec.fileMD5s += testOut -> "72beeb037bfc5a07599630a23d8b325b" PipelineTest.executeTest(spec) } diff --git a/settings/repository/edu.mit.broad/picard-private-parts-2181.jar b/settings/repository/edu.mit.broad/picard-private-parts-2181.jar deleted file mode 100644 index ef3371827..000000000 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-2181.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-2375.jar b/settings/repository/edu.mit.broad/picard-private-parts-2375.jar new file mode 100644 index 000000000..bfa2f65ad Binary files /dev/null and b/settings/repository/edu.mit.broad/picard-private-parts-2375.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-2181.xml b/settings/repository/edu.mit.broad/picard-private-parts-2375.xml similarity index 64% rename from settings/repository/edu.mit.broad/picard-private-parts-2181.xml rename to settings/repository/edu.mit.broad/picard-private-parts-2375.xml index d11423b59..b467f934a 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-2181.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-2375.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.59.1066.xml b/settings/repository/net.sf/picard-1.59.1066.xml deleted file mode 100644 index 73bc3ffee..000000000 --- a/settings/repository/net.sf/picard-1.59.1066.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.59.1066.jar b/settings/repository/net.sf/picard-1.67.1197.jar similarity index 62% rename from settings/repository/net.sf/picard-1.59.1066.jar rename to settings/repository/net.sf/picard-1.67.1197.jar index 1bbfd5a19..9243c02df 100644 Binary files a/settings/repository/net.sf/picard-1.59.1066.jar and b/settings/repository/net.sf/picard-1.67.1197.jar differ diff --git a/settings/repository/net.sf/picard-1.67.1197.xml b/settings/repository/net.sf/picard-1.67.1197.xml new file mode 100644 index 000000000..7d9042d6b --- /dev/null +++ b/settings/repository/net.sf/picard-1.67.1197.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.59.1066.xml b/settings/repository/net.sf/sam-1.59.1066.xml deleted file mode 100644 index 75a327daa..000000000 --- a/settings/repository/net.sf/sam-1.59.1066.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.59.1066.jar b/settings/repository/net.sf/sam-1.67.1197.jar similarity index 78% rename from settings/repository/net.sf/sam-1.59.1066.jar rename to settings/repository/net.sf/sam-1.67.1197.jar index 8380da864..8a8343cfa 100644 Binary files a/settings/repository/net.sf/sam-1.59.1066.jar and b/settings/repository/net.sf/sam-1.67.1197.jar differ diff --git a/settings/repository/net.sf/sam-1.67.1197.xml b/settings/repository/net.sf/sam-1.67.1197.xml new file mode 100644 index 000000000..d43aba4ed --- /dev/null +++ b/settings/repository/net.sf/sam-1.67.1197.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-53.jar b/settings/repository/org.broad/tribble-101.jar similarity index 51% rename from settings/repository/org.broad/tribble-53.jar rename to settings/repository/org.broad/tribble-101.jar index 02865df43..9e81f9eb2 100644 Binary files a/settings/repository/org.broad/tribble-53.jar and b/settings/repository/org.broad/tribble-101.jar differ diff --git a/settings/repository/org.broad/tribble-101.xml b/settings/repository/org.broad/tribble-101.xml new file mode 100644 index 000000000..09d13e43a --- /dev/null +++ b/settings/repository/org.broad/tribble-101.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-53.xml b/settings/repository/org.broad/tribble-53.xml deleted file mode 100644 index cae6cf15a..000000000 --- a/settings/repository/org.broad/tribble-53.xml +++ /dev/null @@ -1,3 +0,0 @@ - - -