Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-05-08 09:34:33 -04:00
commit a4f4b5007b
64 changed files with 358 additions and 351 deletions

View File

@ -955,8 +955,8 @@
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">

View File

@ -25,8 +25,10 @@
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
@ -77,27 +79,15 @@ public final class IntervalBinding<T extends Feature> {
if ( featureIntervals != null ) {
intervals = new ArrayList<GenomeLoc>();
//RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
// toolkit.getGenomeLocParser(),
// toolkit.getArguments().unsafe);
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
if ( codec instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
try {
final FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource()));
final AsciiLineReader lineReader = new AsciiLineReader(fis);
codec.readHeader(lineReader);
String line = lineReader.readLine();
while ( line != null ) {
final Feature feature = codec.decodeLoc(line);
if ( feature == null )
throw new UserException.MalformedFile(featureIntervals.getSource(), "Couldn't parse line '" + line + "'");
FeatureReader<Feature> reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false);
for ( Feature feature : reader.iterator() )
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature));
line = lineReader.readLine();
}
} catch (Exception e) {
throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e);
}

View File

@ -27,13 +27,12 @@ package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.GATKChunk;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileSpan;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -265,7 +264,10 @@ public class BAMScheduler implements Iterator<FilePointer> {
// Naive algorithm: find all elements in current contig for proper schedule creation.
List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
for(GenomeLoc locus: loci) {
if(!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
if (!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()) == null)
throw new ReviewedStingException("BAM file(s) do not have the contig: " + locus.getContig() + ". You are probably using a different reference than the one this file was aligned with");
if (!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
lociInContig.add(locus);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -16,4 +17,18 @@ public abstract class ReadFilter implements SamRecordFilter {
* @param engine the engine.
*/
public void initialize(GenomeAnalysisEngine engine) {}
/**
* Determines whether a pair of SAMRecord matches this filter
*
* @param first the first SAMRecord to evaluate
* @param second the second SAMRecord to evaluate
*
* @return true if the SAMRecords matches the filter, otherwise false
* @throws UnsupportedOperationException when paired filter not implemented
*/
public boolean filterOut(final SAMRecord first, final SAMRecord second) {
throw new UnsupportedOperationException("Paired filter not implemented: " + this.getClass());
}
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.io.storage;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.apache.log4j.Logger;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.AbstractFeatureReader;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
@ -114,7 +114,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
String sourceFilePath = file.getAbsolutePath();
String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath));
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
for ( VariantContext vc : source.iterator() ) {
target.writer.add(vc);

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
* A HACK. Tribble should contain all the information in needs to decode the unqualified position of
* a feature.
*/
public interface ReferenceDependentFeatureCodec<T extends org.broad.tribble.Feature> extends FeatureCodec<T> {
public interface ReferenceDependentFeatureCodec {
/**
* Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features.
* @param genomeLocParser The parser to supply.

View File

@ -26,11 +26,10 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.CloseableTribbleIterator;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -57,7 +56,7 @@ public class RMDTrack {
private final File file; // the associated file we create the reader from
// our feature reader - allows queries
private FeatureSource reader;
private AbstractFeatureReader reader;
// our sequence dictionary, which can be null
private final SAMSequenceDictionary dictionary;
@ -92,7 +91,7 @@ public class RMDTrack {
* @param dict the sam sequence dictionary
* @param codec the feature codec we use to decode this type
*/
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
public RMDTrack(Class type, String name, File file, AbstractFeatureReader reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
this.type = type;
this.name = name;
this.file = file;
@ -116,8 +115,6 @@ public class RMDTrack {
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
CloseableTribbleIterator<Feature> iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop());
if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE )
logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog());
return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName());
}
@ -130,10 +127,6 @@ public class RMDTrack {
reader = null;
}
public FeatureSource getReader() {
return reader;
}
/**
* get the sequence dictionary from the track, if available
* @return a SAMSequenceDictionary if available, null if unavailable

View File

@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
@ -121,7 +119,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
// return a feature reader track
Pair<FeatureSource, SAMSequenceDictionary> pair;
Pair<AbstractFeatureReader, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
else
@ -155,11 +153,11 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
* @param inputFile the file to load
* @return a feature reader implementation
*/
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
private Pair<AbstractFeatureReader, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try {
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
return new Pair<AbstractFeatureReader, SAMSequenceDictionary>(AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
} catch (TribbleException e) {
throw new UserException(e.getMessage(), e);
}
@ -183,12 +181,12 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
* @param storageType How the RMD is streamed into the input file.
* @return the input file as a FeatureReader
*/
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
private Pair<AbstractFeatureReader, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
String name,
File inputFile,
RMDStorageType storageType) {
// Feature source and sequence dictionary to use as the ultimate reference
FeatureSource featureSource = null;
AbstractFeatureReader featureSource = null;
SAMSequenceDictionary sequenceDictionary = null;
// Detect whether or not this source should be indexed.
@ -215,10 +213,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
}
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
else
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), index);
}
catch (TribbleException e) {
throw new UserException(e.getMessage());
@ -228,10 +223,10 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
}
}
else {
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
featureSource = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), createCodec(descriptor, name), false);
}
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
return new Pair<AbstractFeatureReader,SAMSequenceDictionary>(featureSource,sequenceDictionary);
}
/**
@ -358,7 +353,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
private Index createIndexInMemory(File inputFile, FeatureCodec codec) {
// this can take a while, let them know what we're doing
logger.info("Creating Tribble index in memory for file " + inputFile);
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
Index idx = IndexFactory.createDynamicIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
return idx;
}

View File

@ -24,8 +24,8 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.CloseableTribbleIterator;
import org.broad.tribble.Feature;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;

View File

@ -54,8 +54,8 @@ import java.util.*;
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -16,10 +17,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -33,8 +31,12 @@ import java.util.Map;
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private static final int MIN_SAMPLES = 10;
private Set<String> founderIds;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
if(founderIds == null)
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
return calculateIC(vc);
}
@ -43,7 +45,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
}
private Map<String, Object> calculateIC(final VariantContext vc) {
final GenotypesContext genotypes = vc.getGenotypes();
final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds);
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
return null;

View File

@ -29,7 +29,7 @@ import java.util.Map;
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
if ( !vc.hasLog10PError() || stratifiedContexts.size() == 0 )
return null;
final GenotypesContext genotypes = vc.getGenotypes();

View File

@ -64,20 +64,20 @@ public class ContextCovariate implements StandardCovariate {
}
@Override
public CovariateValues getValues(GATKSAMRecord read) {
public CovariateValues getValues(final GATKSAMRecord read) {
int l = read.getReadLength();
BitSet[] mismatches = new BitSet[l];
BitSet[] insertions = new BitSet[l];
BitSet[] deletions = new BitSet[l];
read = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
final boolean negativeStrand = read.getReadNegativeStrandFlag();
byte[] bases = read.getReadBases();
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
byte[] bases = clippedRead.getReadBases();
if (negativeStrand)
bases = BaseUtils.simpleReverseComplement(bases);
for (int i = 0; i < read.getReadLength(); i++) {
for (int i = 0; i < clippedRead.getReadLength(); i++) {
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
insertions[i] = contextWith(bases, i, insertionsContextSize);
deletions[i] = contextWith(bases, i, deletionsContextSize);

View File

@ -25,15 +25,16 @@
package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.*;
import java.util.Iterator;
import java.util.Map;
@ -56,16 +57,14 @@ public class VCFDiffableReader implements DiffableReader {
DiffNode root = DiffNode.rooted(file.getName());
try {
// read the version line from the file
LineReader lineReader = new AsciiLineReader(new FileInputStream(file));
final String version = lineReader.readLine();
BufferedReader br = new BufferedReader(new FileReader(file));
final String version = br.readLine();
root.add("VERSION", version);
lineReader.close();
lineReader = new AsciiLineReader(new FileInputStream(file));
VCFCodec vcfCodec = new VCFCodec();
br.close();
// must be read as state is stored in reader itself
VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
VCFHeader header = (VCFHeader)reader.getHeader();
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
String key = headerLine.getKey();
if ( headerLine instanceof VCFIDHeaderLine)
@ -76,14 +75,14 @@ public class VCFDiffableReader implements DiffableReader {
root.add(key, headerLine.toString());
}
String line = lineReader.readLine();
int count = 0, nRecordsAtPos = 1;
String prevName = "";
while ( line != null ) {
Iterator<VariantContext> it = reader.iterator();
while ( it.hasNext() ) {
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
break;
VariantContext vc = (VariantContext)vcfCodec.decode(line);
VariantContext vc = it.next();
String name = vc.getChr() + ":" + vc.getStart();
if ( name.equals(prevName) ) {
name += "_" + ++nRecordsAtPos;
@ -121,10 +120,9 @@ public class VCFDiffableReader implements DiffableReader {
}
root.add(vcRoot);
line = lineReader.readLine();
}
lineReader.close();
reader.close();
} catch ( IOException e ) {
return null;
}

View File

@ -82,6 +82,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
@Argument(shortName = "prior",required = false,fullName = "DeNovoPrior", doc="Prior for de novo mutations. Default: 1e-8")
private double deNovoPrior=1e-8;
@Argument(shortName = "fatherAlleleFirst",required = false,fullName = "FatherAlleleFirst", doc="Ouputs the father allele as the first allele in phased child genotype. i.e. father|mother rather than mother|father.")
private boolean fatherFAlleleFirst=false;
@Output
protected VCFWriter vcfWriter = null;
@ -183,12 +186,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
ArrayList<Allele> parentPhasedAlleles = new ArrayList<Allele>(2);
ArrayList<Allele> childPhasedAlleles = new ArrayList<Allele>(2);
//If there is a possible phasing between the mother and child => phase
//If there is a possible phasing between the parent and child => phase
int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0));
if(childTransmittedAlleleIndex > -1){
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
childPhasedAlleles.add(childAlleles.get(0));
if(parent.equals(FamilyMember.MOTHER))
childPhasedAlleles.add(childAlleles.get(0));
else
childPhasedAlleles.add(0,childAlleles.get(0));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
}
else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){
@ -196,7 +202,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
parentPhasedAlleles.add(parentAlleles.get(0));
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
childPhasedAlleles.add(childAlleles.get(0));
if(parent.equals(FamilyMember.MOTHER))
childPhasedAlleles.add(childAlleles.get(0));
else
childPhasedAlleles.add(0,childAlleles.get(0));
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
}
//This is a Mendelian Violation => Do not phase
@ -296,6 +305,14 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
else{
phaseFamilyAlleles(mother, father, child);
}
//If child should phased genotype should be father first, then swap the alleles
if(fatherFAlleleFirst && trioPhasedGenotypes.get(FamilyMember.CHILD).isPhased()){
ArrayList<Allele> childAlleles = new ArrayList<Allele>(trioPhasedGenotypes.get(FamilyMember.CHILD).getAlleles());
childAlleles.add(childAlleles.remove(0));
trioPhasedGenotypes.put(FamilyMember.CHILD,new Genotype(DUMMY_NAME,childAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
}
}
/**

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
@ -189,6 +190,8 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
if ( SET_KEY != null )
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
if ( !ASSUME_IDENTICAL_SAMPLES )
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
VCFHeader vcfHeader = new VCFHeader(headerLines, sitesOnlyVCF ? Collections.<String>emptySet() : samples);
vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER);
vcfWriter.writeHeader(vcfHeader);

View File

@ -25,16 +25,14 @@ package org.broadinstitute.sting.utils.codecs.beagle;
*/
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@ -63,7 +61,7 @@ import java.util.regex.Pattern;
* @author Mark DePristo
* @since 2010
*/
public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature> {
public class BeagleCodec extends AsciiFeatureCodec<BeagleFeature> implements ReferenceDependentFeatureCodec {
private String[] header;
public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2};
private BeagleReaderType readerType;
@ -80,25 +78,16 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
*/
private GenomeLocParser genomeLocParser;
public BeagleCodec() {
super(BeagleFeature.class);
}
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
public Feature decodeLoc(String line) {
return decode(line);
}
public static String[] readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {
return readHeader(new AsciiLineReader(is), null);
} finally {
is.close();
}
}
public Object readHeader(LineReader reader)
@ -183,11 +172,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)");
@Override
public Class<BeagleFeature> getFeatureType() {
return BeagleFeature.class;
}
public BeagleFeature decode(String line) {
String[] tokens;

View File

@ -24,8 +24,7 @@
package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.LineReader;
@ -71,18 +70,14 @@ import java.util.Arrays;
* @author Mark DePristo
* @since 2010
*/
public class RawHapMapCodec extends AbstractFeatureCodec {
public class RawHapMapCodec extends AsciiFeatureCodec<RawHapMapFeature> {
// the minimum number of features in the HapMap file line
private static final int minimumFeatureCount = 11;
private String headerLine;
/**
* decode the location only
* @param line the input line to decode
* @return a HapMapFeature
*/
public Feature decodeLoc(String line) {
return decode(line);
public RawHapMapCodec() {
super(RawHapMapFeature.class);
}
/**
@ -90,7 +85,7 @@ public class RawHapMapCodec extends AbstractFeatureCodec {
* @param line the input line to decode
* @return a HapMapFeature, with the given fields
*/
public Feature decode(String line) {
public RawHapMapFeature decode(String line) {
String[] array = line.split("\\s+");
// make sure the split was successful - that we got an appropriate number of fields
@ -113,10 +108,6 @@ public class RawHapMapCodec extends AbstractFeatureCodec {
headerLine);
}
public Class<RawHapMapFeature> getFeatureType() {
return RawHapMapFeature.class;
}
public Object readHeader(LineReader reader) {
try {
headerLine = reader.readLine();

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.codecs.refseq;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -46,13 +46,18 @@ import java.util.ArrayList;
* @author Mark DePristo
* @since 2010
*/
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {
public class RefSeqCodec extends AsciiFeatureCodec<RefSeqFeature> implements ReferenceDependentFeatureCodec {
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
private boolean zero_coding_length_user_warned = false;
public RefSeqCodec() {
super(RefSeqFeature.class);
}
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
@ -130,17 +135,4 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
feature.setExon_frames(exon_frames);
return feature;
}
@Override
public Object readHeader(LineReader reader) {
return null;
}
@Override
public Class<RefSeqFeature> getFeatureType() {
return RefSeqFeature.class;
}
public boolean canDecode(final String potentialInput) { return false; }
}

View File

@ -25,10 +25,9 @@
package org.broadinstitute.sting.utils.codecs.sampileup;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
import java.util.ArrayList;
@ -76,7 +75,7 @@ import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.V
* @author Matt Hanna
* @since 2009
*/
public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
public class SAMPileupCodec extends AsciiFeatureCodec<SAMPileupFeature> {
// the number of tokens we expect to parse from a pileup line
private static final int expectedTokenCount = 10;
private static final char fldDelim = '\t';
@ -88,24 +87,8 @@ public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
private static final String baseT = "T";
private static final String emptyStr = ""; // we will use this for "reference" allele in insertions
/**
* Return the # of header lines for this file.
*
* @param reader the line reader
* @return 0 in this case, we assume no header lines.
*/
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return null;
}
@Override
public Class<SAMPileupFeature> getFeatureType() {
return SAMPileupFeature.class;
}
public Feature decodeLoc(String line) {
return decode(line);
public SAMPileupCodec() {
super(SAMPileupFeature.class);
}
public SAMPileupFeature decode(String line) {
@ -285,5 +268,4 @@ public class SAMPileupCodec extends AbstractFeatureCodec<SAMPileupFeature> {
feature.setPileupBases(baseBuilder.toString());
feature.setPileupQuals(qualBuilder.toString());
}
}

View File

@ -27,10 +27,9 @@ package org.broadinstitute.sting.utils.codecs.samread;
import net.sf.samtools.Cigar;
import net.sf.samtools.TextCigarCodec;
import net.sf.samtools.util.StringUtil;
import org.broad.tribble.AbstractFeatureCodec;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.ParsingUtils;
/**
@ -52,31 +51,14 @@ import org.broad.tribble.util.ParsingUtils;
* @author Matt Hanna
* @since 2009
*/
public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> {
public class SAMReadCodec extends AsciiFeatureCodec<SAMReadFeature> {
/* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */
// the number of tokens we expect to parse from a read line
private static final int expectedTokenCount = 11;
/**
* Return the # of header lines for this file.
*
* @param reader the line reader
* @return 0 in this case, we assume no header lines. The reads file may have a
* header line beginning with '@', but we can ignore that in the decode function.
*/
public Object readHeader(LineReader reader) {
// we don't require a header line, but it may exist. We'll deal with that above.
return null;
}
@Override
public Class<SAMReadFeature> getFeatureType() {
return SAMReadFeature.class;
}
public Feature decodeLoc(String line) {
return decode(line);
public SAMReadCodec() {
super(SAMReadFeature.class);
}
/**
@ -131,6 +113,4 @@ public class SAMReadCodec extends AbstractFeatureCodec<SAMReadFeature> {
bases,
qualities);
}
}

View File

@ -23,7 +23,7 @@ import java.util.Arrays;
public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec {
@Override
public Feature decode(String line) {
public TableFeature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter))
return null;
String[] split = line.split(delimiterRegex);

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.codecs.table;
import org.broad.tribble.Feature;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -39,7 +39,7 @@ import java.util.Arrays;
* @author Mark DePristo
* @since 2009
*/
public class TableCodec implements ReferenceDependentFeatureCodec {
public class TableCodec extends AsciiFeatureCodec<TableFeature> implements ReferenceDependentFeatureCodec {
final static protected String delimiterRegex = "\\s+";
final static protected String headerDelimiter = "HEADER";
final static protected String igvHeaderDelimiter = "track";
@ -52,6 +52,10 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
*/
protected GenomeLocParser genomeLocParser;
public TableCodec() {
super(TableFeature.class);
}
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
@ -61,14 +65,8 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
this.genomeLocParser = genomeLocParser;
}
@Override
public Feature decodeLoc(String line) {
return decode(line);
}
@Override
public Feature decode(String line) {
public TableFeature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter))
return null;
String[] split = line.split(delimiterRegex);
@ -77,11 +75,6 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
}
@Override
public Class<TableFeature> getFeatureType() {
return TableFeature.class;
}
@Override
public Object readHeader(LineReader reader) {
String line = "";
@ -106,7 +99,4 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
}
return header;
}
public boolean canDecode(final String potentialInput) { return false; }
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
@ -10,14 +10,20 @@ import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.LazyGenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.io.*;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
protected final static Logger log = Logger.getLogger(VCFCodec.class);
@ -61,6 +67,10 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
protected Map<String, String> stringCache = new HashMap<String, String>();
protected AbstractVCFCodec() {
super(VariantContext.class);
}
/**
* Creates a LazyParser for a LazyGenotypesContext to use to decode
* our genotypes only when necessary. We do this instead of eagarly
@ -266,7 +276,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
* @param line the line
* @return a VariantContext
*/
public Feature decode(String line) {
public VariantContext decode(String line) {
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
@ -378,14 +388,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
return vc;
}
/**
*
* @return the type of record
*/
public Class<VariantContext> getFeatureType() {
return VariantContext.class;
}
/**
* get the name of this codec
* @return our set name

View File

@ -28,12 +28,10 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.DynamicIndexCreator;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -142,3 +140,31 @@ public abstract class IndexingVCFWriter implements VCFWriter {
}
}
}
class PositionalStream extends OutputStream {
OutputStream out = null;
private long position = 0;
public PositionalStream(OutputStream out) {
this.out = out;
}
public void write(final byte[] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException {
//System.out.println("write: " + bytes + " " + numBytes);
position += numBytes;
out.write(bytes, startIndex, numBytes);
}
public void write(int c) throws IOException {
System.out.println("write byte: " + c);
//System.out.printf("Position %d for %c\n", position, (char)c);
position++;
out.write(c);
}
public long getPosition() { return position; }
}

View File

@ -108,12 +108,17 @@ public class StandardVCFWriter extends IndexingVCFWriter {
// write out the column line
mWriter.write(VCFHeader.HEADER_INDICATOR);
boolean isFirst = true;
for ( VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields() ) {
if ( isFirst )
isFirst = false; // don't write out a field separator
else
mWriter.write(VCFConstants.FIELD_SEPARATOR);
mWriter.write(field.toString());
mWriter.write(VCFConstants.FIELD_SEPARATOR);
}
if ( mHeader.hasGenotypingData() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
mWriter.write("FORMAT");
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);

View File

@ -24,7 +24,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
* @param line the header line
* @param version the vcf header version
*/
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
public VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, "FILTER", Arrays.asList("ID", "Description"));
}
}

View File

@ -20,7 +20,7 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
super(name, count, type, description, SupportedHeaderLineType.FORMAT);
}
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
public VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.FORMAT);
}

View File

@ -112,7 +112,7 @@ public class VCFHeader {
* @param genotypeSampleNamesInAppearenceOrder genotype sample names
*/
protected void buildVCFReaderMaps(List<String> genotypeSampleNamesInAppearenceOrder) {
public void buildVCFReaderMaps(List<String> genotypeSampleNamesInAppearenceOrder) {
sampleNamesInOrder = new ArrayList<String>(genotypeSampleNamesInAppearenceOrder.size());
sampleNameToOffset = new HashMap<String, Integer>(genotypeSampleNamesInAppearenceOrder.size());

View File

@ -17,7 +17,7 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
public VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.INFO);
}

View File

@ -48,7 +48,7 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin
* @param key the key for this header line
* @param expectedTagOrdering the tag ordering expected for this header line
*/
protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List<String> expectedTagOrdering) {
public VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List<String> expectedTagOrdering) {
super(key, "");
Map<String, String> mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering);
name = mapping.get("ID");

View File

@ -76,7 +76,7 @@ public class WalkerTest extends BaseTest {
public static void assertOnDiskIndexEqualToNewlyCreatedIndex(final File indexFile, final String name, final File resultFile) {
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
Index indexFromOutputFile = IndexFactory.createDynamicIndex(resultFile, new VCFCodec());
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
if ( ! indexFromOutputFile.equalsIgnoreProperties(dynamicIndex) ) {

View File

@ -24,7 +24,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
1,
Arrays.asList("89a1c56f264ac27a2a4be81072473b6f"));
Arrays.asList("444a20659f67592a8284e0b7849e4302"));
executeTest("Test symbolic alleles", spec);
}
@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
1,
Arrays.asList("3008d6f5044bc14801e5c58d985dec72"));
Arrays.asList("93a24c019663a6011b4d6de12538df11"));
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
}
}

View File

@ -54,7 +54,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsNotAsking2() {
// this genotype annotations in this file are actually out of order. If you don't parse the genotypes
// the genotype annotations in this file are actually out of order. If you don't parse the genotypes
// they don't get reordered. It's a good test of the genotype ordering system.
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
@ -126,6 +126,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("getting DB tag with HM3", spec);
}
@Test
public void testNoQuals() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant " + validationDataLocation + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + validationDataLocation + "noQual.vcf -A QualByDepth", 1,
Arrays.asList("e531c9f90c17f0f859cd1ac851a8edd8"));
executeTest("test file doesn't have QUALs", spec);
}
@Test
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -144,7 +152,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testTabixAnnotations() {
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
final String MD5 = "bb9a148716fc69d706c5be146c1afa00";
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -A HomopolymerRun --variant:vcf " + validationDataLocation + file + " -L " + validationDataLocation + "CEU.exon.2010_03.sites.vcf -NO_HEADER", 1,
@ -198,4 +206,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
}
@Test
public void testInbreedingCoeffPed() {
final String MD5 = "7f1314fada5cb1f35ba1996f8a7a686b";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
Arrays.asList(MD5));
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
}
}

View File

@ -172,7 +172,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
e.put( "-sites_only", "446ed9a72d210671ed48aa1f572b77e3" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "ecf92054c1e4bd9d6529b8002d385165" );
e.put( "--output_mode EMIT_ALL_SITES", "e10819a2a7960254e27ed2b958b45d56" );
@ -376,7 +376,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("973178b97efd2daacc9e45c414275d59"));
Arrays.asList("1e4595c85159bd9b62eff575134b5dd9"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -384,7 +384,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("220facd2eb0923515d1d8ab874055564"));
Arrays.asList("6d9dbb949626fdb0bca6a7904e9b0c1f"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -392,7 +392,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a"));
Arrays.asList("a31a355bc2b8257b2a45494e97322694"));
executeTest("test minIndelFraction 1.0", spec);
}
}

View File

@ -9,13 +9,13 @@ import java.util.Map;
public class IndelRealignerIntegrationTest extends WalkerTest {
private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.fixed.bam";
private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.fixed.fixmates.bam";
private static final String mainTestIntervals = validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals";
private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf";
private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 ";
private static final String baseCommand = baseCommandPrefix + "-o %s ";
private static final String base_md5 = "e041186bca9dccf360747c89be8417ad";
private static final String base_md5_with_SW_or_VCF = "d7c7acd346ee4c8d34f1e2499ff7c313";
private static final String base_md5 = "7574ab7d0b1ee5d44a0b3f85b6e944e6";
private static final String base_md5_with_SW_or_VCF = "a918d69d26d3c87b29002ed31f428c48";
@Test
public void testDefaults() {
@ -38,7 +38,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels,
1,
Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888"));
Arrays.asList("36718f10d523dfb0fa2a709480f24bd4"));
executeTest("realigner known indels only from VCF", spec1);
}
@ -55,7 +55,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
public void testLods() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-LOD 60", base_md5 );
e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "f158f18198cf48bbb3d4f1b7127928a3" );
e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "9a75a0f7ad0442c78d0f8df260e733a4" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -71,7 +71,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s",
1,
Arrays.asList("fe39c007d287d372a8137d11c60fbc50"));
Arrays.asList("e98f51d71f0a82141b36a7e9f94db237"));
executeTest("realigner long run", spec);
}
@ -80,7 +80,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW",
1,
Arrays.asList("e77e59cc6363cf58f392ce5ea8d7e0b6"));
Arrays.asList("58ac675d0699eb236d469b8e84513d11"));
executeTest("realigner no output tags", spec);
}

View File

@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
Arrays.asList("d54a142d68dca54e478c13f9a0e4c95c","1a37fcc93a73429f9065b942ab771233")
);
executeTest("testTrueNegativeMV", spec);
}
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
Arrays.asList("883ea7fd2b200c4b7fa95a4f7aa15931","7b1f5309c3d4f4aa7e9061f288dceb68")
);
executeTest("testTruePositiveMV", spec);
}
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
Arrays.asList("e812d62a3449b74b6948ee7deb8a0790","d00922496759e84c66a4b5e222e36997")
);
executeTest("testFalsePositiveMV", spec);
}
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
Arrays.asList("e3c572f933a40e1878a2cfa52049517a","60e4f0be344fb944ab3378f9ab27da64")
);
executeTest("testSpecialCases", spec);
}
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
Arrays.asList("b42af3b73a2cb38cfc92f8047dd686b3","a69c3f9c005e852b44c29ab25e87ba0d")
);
executeTest("testPriorOption", spec);
}
@ -128,9 +128,30 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
Arrays.asList("d00922496759e84c66a4b5e222e36997")
);
executeTest("testMVFileOption", spec);
}
//Test when running with the fatherAlleleFirst option
@Test
public void testFatherAlleleFirst() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + TPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s",
"-fatherAlleleFirst"
),
2,
Arrays.asList("c158a3816357597543ef85c4478c41e8","4f8daca19c8f31bd87850c124f91e330")
);
executeTest("testFatherAlleleFirst", spec);
}
}

View File

@ -33,7 +33,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(sampleNone + freqUnif + "--variant " + testfile),
1,
Arrays.asList("d49baeb8000a426c172ce1d81eb37963")
Arrays.asList("6a9e990a9252840904b5144213915b32")
);
executeTest("testNoSampleSelectionFreqUniform--" + testfile, spec);
@ -45,7 +45,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(sampleNone + freqAF + "--variant " + testfile),
1,
Arrays.asList("0fb0d015d462c34514fc7e96beea5f56")
Arrays.asList("eaa2385086cddff68cf4fdb81cbdbbb9")
);
executeTest("testNoSampleSelectionFreqAF--" + testfile, spec);
@ -57,7 +57,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(sampleGT + freqUnif + "--variant " + testfile),
1,
Arrays.asList("0672854299d42ea8af906976a3849ae6")
Arrays.asList("24077656f590d6905546f7e019c8dccb")
);
executeTest("testPolyGTFreqUniform--" + testfile, spec);
@ -69,7 +69,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(sampleGT + freqAF + "--variant " + testfile),
1,
Arrays.asList("5bdffda1a063d0bddd6b236854ec627d")
Arrays.asList("3c1180fd9b5e80e540b39c5a95fbe722")
);
executeTest("testPolyGTFreqAF--" + testfile, spec);
@ -81,7 +81,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(sampleGL + freqAF + "--variant " + testfile),
1,
Arrays.asList("35ef16aa41303606a4b94f7b88bd9aa8")
Arrays.asList("ad30c028864348204ebe80b9c8c503e8")
);
executeTest("testPolyGLFreqAF--" + testfile, spec);

View File

@ -27,8 +27,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
"f8e21a1987960b950db1f0d98be45352", // recal file
"f67d844b6252a55452cf4167b77530b1"); // cut VCF
"a45a78de049cfe767ce23d3423f80b01", // recal file
"1050c387d170639f8cec221e5dddd626"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@ -74,8 +74,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
"cf380d9b0ae04c8918be8425f82035b4", // recal file
"b00e5e5a6807df8ed1682317948e8a6d"); // cut VCF
"918a5ecad5a2a8a46795144366683188", // recal file
"bf0e8ed5e250d52f0545074c61217d16"); // cut VCF
@DataProvider(name = "VRIndelTest")
public Object[][] createData2() {
@ -131,7 +131,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -o %s" +
" -tranchesFile " + validationDataLocation + "VQSR.mixedTest.tranches" +
" -recalFile " + validationDataLocation + "VQSR.mixedTest.recal",
Arrays.asList("08060b7f5c9cf3bb1692b50c58fd5a4b"));
Arrays.asList("9039576b63728df7ee2c881817c0e9eb"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
}

View File

@ -78,26 +78,26 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ea0a660cd04101ce7b534aba0310721d"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "cb0350e7a9d2483993482b69f5432b64", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "0571c48cc59cf244779caae52d562e79", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0"); } // official project VCF files in tabix format
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c1e82f0842ca721d10f21604f26a5248"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "b2fcf3983cc9e667b9bbed8372080776", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "98c0cbb94e5debf7545a656665a1b659", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "10170f9e72cc831a5820bd03e70fe46a"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "75901304abc1daa41b1906f881aa7bbc"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e"); }
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "074e909f80ffcc9fddc3fac89ea36bef"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f26980af214011c0452b8ce843f3063b"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "d08e933b6c81246e998d3ece50ddfdcc"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "7c337c8752abeffb0c9a4ee35d1a1451"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "01967686e0e02dbccd2590b70f2d049b"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "8c113199c4a93a4a408104b735d59044"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "30e96a0cb614cd5bc056e1f7ec6d10bd"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "06e86711bcf0efc0f0c4a378f6147cf6"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "03103f6b39e9fb7a396df0013f01fae6"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "12fc1b8145f7884762f0c2cbbd319ae1"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "7e2dba80ba38b2a86713f635d630eb59"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "78a49597f1abf1c738e67d50c8fbed2b"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "63fc20d6223e1387563a1164987d716c"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4c63bfa5f73793aaca42e130ec49f238"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "86e326acbd8d2af8a6040eb146d92fc6"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "5c60eb8d5d4b957a0cf52ca008f021ba"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "774b43e69cc7ec93090b4f6e9f4a1079"); }
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("ee43a558fd3faeaa447acab89f0001d5"));
Arrays.asList("988f9d294a8ff4278e40e76a72200bf4"));
executeTest("threeWayWithRefs", spec);
}
@ -127,17 +127,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
}
@Test public void complexTestFull() { combineComplexSites("", "2842337e9943366f7a4d5f148f701b8c"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "39724318e6265d0318a3fe4609612785"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); }
@Test public void complexTestFull() { combineComplexSites("", "dd805f6edfc3cf724512dfbbe8df5183"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "14a205edb022f79abf1863588cfee56b"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "e118d04d1d47c02ad38c046561a9f616"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "e118d04d1d47c02ad38c046561a9f616"); }
@Test
public void combineDBSNPDuplicateSites() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
1,
Arrays.asList("5969446769cb8377daa2db29304ae6b5"));
Arrays.asList("a838dc241cf357466cd4331fd298c73a"));
executeTest("combineDBSNPDuplicateSites:", spec);
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
@ -63,7 +64,7 @@ public class CombineVariantsUnitTest {
private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec();
VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr)));
VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
return head;
}

View File

@ -40,7 +40,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER",
1,
Arrays.asList("158b1d71b28c52e2789f164500b53732"));
Arrays.asList("8e0991576518823b339a4e2f83299d4f"));
executeTest("test left alignment", spec);
}
}

View File

@ -136,7 +136,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("20b52c96f5c48258494d072752b53693")
Arrays.asList("ffa2524380d84a870d2e4a33d9f3d31a")
);
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);

View File

@ -19,7 +19,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingDbsnpInput() {
List<String> md5 = new ArrayList<String>();
md5.add("d64942fed2a5b7b407f9537dd2b4832e");
md5.add("a26afcce2a89f905a49c3d09719586b2");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -152,7 +153,7 @@ public class HapMapUnitTest {
public AsciiLineReader getReader() {
try {
return new AsciiLineReader(new FileInputStream(hapMapFile));
return new AsciiLineReader(new PositionalBufferedStream(new FileInputStream(hapMapFile)));
} catch (FileNotFoundException e) {
Assert.fail("Unable to open hapmap file : " + hapMapFile);
}

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.CloseableTribbleIterator;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.*;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broad.tribble.source.BasicFeatureSource;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -45,14 +45,14 @@ public class IndexFactoryUnitTest extends BaseTest {
//
@Test
public void testOnTheFlyIndexing1() throws IOException {
Index indexFromInputFile = IndexFactory.createIndex(inputFile, new VCFCodec());
Index indexFromInputFile = IndexFactory.createDynamicIndex(inputFile, new VCFCodec());
if ( outputFileIndex.exists() ) {
System.err.println("Deleting " + outputFileIndex);
outputFileIndex.delete();
}
for ( int maxRecords : Arrays.asList(0, 1, 10, 100, 1000, -1)) {
BasicFeatureSource<VariantContext> source = new BasicFeatureSource<VariantContext>(inputFile.getAbsolutePath(), indexFromInputFile, new VCFCodec());
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), indexFromInputFile);
int counter = 0;
VCFWriter writer = new StandardVCFWriter(outputFile, dict);
@ -66,7 +66,7 @@ public class IndexFactoryUnitTest extends BaseTest {
// test that the input index is the same as the one created from the identical input file
// test that the dynamic index is the same as the output index, which is equal to the input index
WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile);
//WalkerTest.assertOnDiskIndexEqualToNewlyCreatedIndex(outputFileIndex, "unittest", outputFile);
}
}
}

View File

@ -35,7 +35,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("76075307afd26b4db6234795d9fb3c2f"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("acee3b6bdb4b759992f54065c675a249"));
executeTest("Test reading and writing breakpoint VCF", spec1);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
@ -24,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest {
private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec();
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr)));
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount);
return header;
}

View File

@ -1,7 +1,10 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.Tribble;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -59,16 +62,10 @@ public class VCFWriterUnitTest extends BaseTest {
writer.add(createVC(header));
writer.add(createVC(header));
writer.close();
VCFCodec reader = new VCFCodec();
AsciiLineReader lineReader;
VCFCodec codec = new VCFCodec();
VCFHeader headerFromFile = null;
try {
lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile));
headerFromFile = (VCFHeader)reader.readHeader(lineReader);
}
catch (FileNotFoundException e ) {
throw new ReviewedStingException(e.getMessage());
}
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false);
headerFromFile = (VCFHeader)reader.getHeader();
int counter = 0;
@ -76,12 +73,9 @@ public class VCFWriterUnitTest extends BaseTest {
validateHeader(headerFromFile);
try {
while(true) {
String line = lineReader.readLine();
if (line == null)
break;
VariantContext vc = (VariantContext)reader.decode(line);
Iterator<VariantContext> it = reader.iterator();
while(it.hasNext()) {
VariantContext vc = it.next();
counter++;
}
Assert.assertEquals(counter, 2);

View File

@ -78,30 +78,31 @@ public class VariantContextBenchmark extends SimpleBenchmark {
private GenomeLocParser b37GenomeLocParser;
@Override protected void setUp() {
try {
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference));
b37GenomeLocParser = new GenomeLocParser(seq);
} catch ( FileNotFoundException e) {
throw new RuntimeException(e);
}
// read it into a String so that we don't try to benchmark IO issues
try {
FileInputStream s = new FileInputStream(new File(vcfFile));
AsciiLineReader lineReader = new AsciiLineReader(s);
int counter = 0;
StringBuffer sb = new StringBuffer();
while (counter++ < linesToRead ) {
String line = lineReader.readLine();
if ( line == null )
break;
sb.append(line + "\n");
}
s.close();
INPUT_STRING = sb.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
// TODO -- update for new tribble interface
// try {
// ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference));
// b37GenomeLocParser = new GenomeLocParser(seq);
// } catch ( FileNotFoundException e) {
// throw new RuntimeException(e);
// }
//
// // read it into a String so that we don't try to benchmark IO issues
// try {
// FileInputStream s = new FileInputStream(new File(vcfFile));
// AsciiLineReader lineReader = new AsciiLineReader(s);
// int counter = 0;
// StringBuffer sb = new StringBuffer();
// while (counter++ < linesToRead ) {
// String line = lineReader.readLine();
// if ( line == null )
// break;
// sb.append(line + "\n");
// }
// s.close();
// INPUT_STRING = sb.toString();
// } catch (IOException e) {
// throw new RuntimeException(e);
// }
}
private interface FunctionToBenchmark<T extends Feature> {
@ -109,23 +110,24 @@ public class VariantContextBenchmark extends SimpleBenchmark {
}
private <T extends Feature> void runBenchmark(FeatureCodec<T> codec, FunctionToBenchmark<T> func) {
try {
InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes());
AsciiLineReader lineReader = new AsciiLineReader(is);
codec.readHeader(lineReader);
int counter = 0;
while (counter++ < linesToRead ) {
String line = lineReader.readLine();
if ( line == null )
break;
T vc = codec.decode(line);
func.run(vc);
}
} catch (Exception e) {
System.out.println("Benchmarking run failure because of " + e.getMessage());
}
// TODO -- update for new Tribble interface
// try {
// InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes());
// AsciiLineReader lineReader = new AsciiLineReader(is);
// codec.readHeader(lineReader);
//
// int counter = 0;
// while (counter++ < linesToRead ) {
// String line = lineReader.readLine();
// if ( line == null )
// break;
//
// T vc = codec.decode(line);
// func.run(vc);
// }
// } catch (Exception e) {
// System.out.println("Benchmarking run failure because of " + e.getMessage());
// }
}
public void timeV14(int rep) {

View File

@ -4,14 +4,16 @@ import java.io.File
import org.apache.commons.io.FilenameUtils
import scala.io.Source._
import net.sf.samtools.SAMFileReader
import org.broad.tribble.source.BasicFeatureSource
import org.broadinstitute.sting.utils.codecs.vcf.{VCFHeader, VCFCodec}
import scala.collection.JavaConversions._
import org.broad.tribble.{FeatureCodec, AbstractFeatureReader}
import org.broadinstitute.sting.utils.variantcontext.VariantContext
object VCF_BAM_utilities {
def getSamplesFromVCF(vcfFile: File): List[String] = {
return BasicFeatureSource.getFeatureSource(vcfFile.getPath(), new VCFCodec()).getHeader().asInstanceOf[VCFHeader].getGenotypeSamples().toList
val codec: FeatureCodec[VariantContext] = new VCFCodec().asInstanceOf[FeatureCodec[VariantContext]]
AbstractFeatureReader.getFeatureReader(vcfFile.getPath, codec).getHeader.asInstanceOf[VCFHeader].getGenotypeSamples.toList
}
def getSamplesInBAM(bam: File): List[String] = {

View File

@ -42,7 +42,7 @@ class DataProcessingPipelineTest {
" -nv ",
" -test ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936"
spec.fileMD5s += testOut -> "0de95b5642e41e11ecd6fa1770242b88"
PipelineTest.executeTest(spec)
}
@ -62,7 +62,7 @@ class DataProcessingPipelineTest {
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708"
spec.fileMD5s += testOut -> "72beeb037bfc5a07599630a23d8b325b"
PipelineTest.executeTest(spec)
}

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2181" status="integration" publication="20120110094400" />
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2375" status="integration" publication="20120502094400" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.59.1066" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.67.1197" status="release" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.59.1066" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.67.1197" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="101" status="integration" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="53" status="integration" />
</ivy-module>