Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-07-10 12:53:00 -06:00
commit 75e5a50b8a
32 changed files with 179 additions and 83 deletions

View File

@ -154,6 +154,13 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
@Argument(fullName = "dont_compress_read_names", shortName = "nocmp_names", doc = "", required = false)
protected boolean DONT_COMPRESS_READ_NAMES = false;
/**
* Optionally hard clip all incoming reads to the desired intervals. The hard clips will happen exactly at the interval
* border.
*/
@Argument(fullName = "hard_clip_to_interval", shortName = "clip_int", doc = "", required = false)
protected boolean HARD_CLIP_TO_INTERVAL = false;
/**
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
* considered consensus.
@ -166,7 +173,7 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
* considered consensus.
*/
@Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false)
protected double minIndelProportionToTriggerVariant = 0.01;
protected double minIndelProportionToTriggerVariant = 0.05;
/**
* Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this).
@ -260,8 +267,14 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
read = ReadClipper.hardClipAdaptorSequence(read); // Strip away adaptor sequences, if any.
if (!DONT_CLIP_LOW_QUAL_TAILS)
read = ReadClipper.hardClipLowQualEnds(read, minTailQuality); // Clip low quality tails
if (!isWholeGenome())
mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval
if (!isWholeGenome()) {
if (HARD_CLIP_TO_INTERVAL)
mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval
else {
mappedReads = new LinkedList<GATKSAMRecord>();
mappedReads.add(read);
}
}
else {
mappedReads = new LinkedList<GATKSAMRecord>();
if (!read.isEmpty())

View File

@ -21,28 +21,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "4c92d59d4a5292af1f968dc922c2c63e");
RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
}
@Test(enabled = true)
public void testMultipleIntervals() {
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
RRTest("testMultipleIntervals ", intervals, "97d5c3fda5551741676793ba325ec7ed");
RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
}
@Test(enabled = true)
public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e6bc1cd0e9de961cf0fb1789bf6ab108");
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
}
@Test(enabled = true)
public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "f33ec7cd0b98eebd73d1025ca656cd7e");
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27");
}
@Test(enabled = true)
public void testIndelCompression() {
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "e6e2bc889e4f342a7fedc5d38b391d20");
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
}
@Test(enabled = true)
@ -62,7 +62,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testAddingReadAfterTailingTheStash() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("022931f032a4122cfe41e58e74d0aede")));
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
}
}

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
@ -33,7 +34,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( mendelianViolation == null ) {
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {

View File

@ -0,0 +1,61 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Binary covariate allows BQSR to recalibrate based on a binary covariate in the BAM file. This covariate should assume values of 1 and 0.
*
* @author Mauricio Carneiro
* @since 7/6/12
*/
public class BinaryTagCovariate implements ExperimentalCovariate {
private String tag;
@Override
public void initialize(RecalibrationArgumentCollection RAC) {
tag = RAC.BINARY_TAG_NAME;
}
@Override
public void recordValues(GATKSAMRecord read, ReadCovariates values) {
final Object tagObject = read.getAttribute(tag);
byte[] binaryTag;
if (tagObject instanceof byte[])
binaryTag = (byte[]) tagObject;
else if (tagObject instanceof String) {
int readLength = ((String) tagObject).length();
binaryTag = new byte[readLength];
for (int i = 0; i<readLength; i++)
binaryTag[i] = Byte.decode(((String) tagObject).substring(i, i+1));
}
else
throw new UserException("Binary tag is not a byte array (fast) or a string (slow). Type not supported");
for (int i = 0; i < read.getReadLength(); i++) {
values.addCovariate((int) binaryTag[i], (int) binaryTag[i], (int) binaryTag[i], i);
}
}
@Override
public Object getValue(String str) {
return Integer.decode(str);
}
@Override
public String formatKey(int key) {
return String.format("%d", key);
}
@Override
public int keyFromValue(Object value) {
return Integer.decode((String) value);
}
@Override
public int maximumKeyValue() {
return 1;
}
}

View File

@ -55,7 +55,7 @@ public interface Covariate {
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
/**
* Used to get the covariate's value from input csv file during on-the-fly recalibration
* Used to get the covariate's value from input (Recalibration Report) file during on-the-fly recalibration
*
* @param str the key in string type (read from the csv)
* @return the key in it's correct type.

View File

@ -153,6 +153,11 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
public int QUANTIZING_LEVELS = 16;
/**
* The tag name for the binary tag covariate (if using it)
*/
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
public String BINARY_TAG_NAME = null;
@Hidden
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
@ -205,6 +210,8 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;
}

View File

@ -290,6 +290,9 @@ public class RecalibrationReport {
else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
}
return RAC;

View File

@ -27,15 +27,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
@ -69,7 +66,7 @@ public class VCFDiffableReader implements DiffableReader {
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), vcfCodec, false);
VCFHeader header = (VCFHeader)reader.getHeader();
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
for ( VCFHeaderLine headerLine : header.getMetaDataInInputOrder() ) {
String key = headerLine.getKey();
if ( headerLine instanceof VCFIDHeaderLine)
key += "_" + ((VCFIDHeaderLine) headerLine).getID();

View File

@ -198,21 +198,19 @@ public class PairHMMIndelErrorModel {
}
}
else {
final int refWindowStart = ref.getWindow().getStart();
final int refWindowStop = ref.getWindow().getStop();
if (DEBUG) {
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
}
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read.isEmpty())
continue;
if (read.getSoftEnd() > ref.getWindow().getStop())
if (!read.isEmpty() && (read.getSoftEnd() > refWindowStop && read.getSoftStart() < refWindowStop))
read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop());
if (read.isEmpty())
continue;
if (read.getSoftStart() < ref.getWindow().getStart())
if (!read.isEmpty() && (read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart))
read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart());
if (read.isEmpty())

View File

@ -102,7 +102,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter
if (useSingleSample != null) { // only want to output context for one sample
Set<String> singSampSet = new TreeSet<String>();
singSampSet.add(useSingleSample);
header = new VCFHeader(header.getMetaData(), singSampSet);
header = new VCFHeader(header.getMetaDataInSortedOrder(), singSampSet);
}
innerWriter.writeHeader(header);

View File

@ -225,10 +225,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
}
if( !dataManager.checkHasTrainingSet() ) {
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -resource:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
if( !dataManager.checkHasTruthSet() ) {
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -resource:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
@ -463,7 +463,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
stream.println("dev.off()");
stream.println("if (exists(\"compactPDF\")) {");
stream.println("compactPDF(ouputPDF)");
stream.println("compactPDF(outputPDF)");
stream.println("}");
stream.close();

View File

@ -166,7 +166,6 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false)
public boolean SUPPRESS_COMMAND_LINE_HEADER = false;
@Hidden
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
public boolean MERGE_INFO_WITH_MAX_AC = false;

View File

@ -63,7 +63,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples);
writer.writeHeader(vcfHeader);
}

View File

@ -92,7 +92,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaData();
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder();
baseWriter.writeHeader(new VCFHeader(headerLines, samples));
writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, 200);

View File

@ -95,7 +95,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
if ( vcfHeaders.containsKey(trackName) )
metaData.addAll(vcfHeaders.get(trackName).getMetaData());
metaData.addAll(vcfHeaders.get(trackName).getMetaDataInSortedOrder());
if ( RECORD_ORIGINAL_LOCATION ) {
metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record"));
metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record"));

View File

@ -385,10 +385,14 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
if ( value == null )
builder.unfiltered();
else {
if ( value instanceof Integer )
if ( value instanceof Integer ) {
// fast path for single integer result
builder.filter(getDictionaryString((Integer)value));
else {
final String filterString = getDictionaryString((Integer)value);
if ( VCFConstants.PASSES_FILTERS_v4.equals(filterString))
builder.passFilters();
else
builder.filter(filterString);
} else {
for ( final int offset : (List<Integer>)value )
builder.filter(getDictionaryString(offset));
}

View File

@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.*;
@ -84,8 +81,8 @@ public final class BCF2Utils {
boolean sawPASS = false;
// set up the strings dictionary
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line instanceof VCFIDHeaderLine) {
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);

View File

@ -115,7 +115,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion version ) {
this.version = version;
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
Set<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine>();
Set<String> sampleNames = new LinkedHashSet<String>();
int contigCounter = 0;
// iterate over all the passed in strings

View File

@ -52,7 +52,7 @@ public class VCFHeader {
}
// the associated meta data
private final Set<VCFHeaderLine> mMetaData = new TreeSet<VCFHeaderLine>();
private final Set<VCFHeaderLine> mMetaData = new LinkedHashSet<VCFHeaderLine>();
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new HashMap<String, VCFFilterHeaderLine>();
@ -230,14 +230,22 @@ public class VCFHeader {
}
/**
* get the meta data, associated with this header
* get the meta data, associated with this header, in sorted order
*
* @return a set of the meta data
*/
public Set<VCFHeaderLine> getMetaData() {
Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
public Set<VCFHeaderLine> getMetaDataInInputOrder() {
return makeGetMetaDataSet(mMetaData);
}
public Set<VCFHeaderLine> getMetaDataInSortedOrder() {
return makeGetMetaDataSet(new TreeSet<VCFHeaderLine>(mMetaData));
}
private static Set<VCFHeaderLine> makeGetMetaDataSet(final Set<VCFHeaderLine> headerLinesInSomeOrder) {
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_1.getFormatString(), VCFHeaderVersion.VCF4_1.getVersionString()));
lines.addAll(mMetaData);
lines.addAll(headerLinesInSomeOrder);
return Collections.unmodifiableSet(lines);
}
@ -247,7 +255,7 @@ public class VCFHeader {
* @return
*/
public VCFHeaderLine getMetaDataLine(final String key) {
for (final VCFHeaderLine line: getMetaData()) {
for (final VCFHeaderLine line: mMetaData) {
if ( line.getKey().equals(key) )
return line;
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -60,8 +59,8 @@ public class VCFStandardHeaderLines {
@Requires("header != null")
@Ensures("result != null")
public static VCFHeader repairStandardHeaderLines(final VCFHeader header) {
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaData().size());
for ( VCFHeaderLine line : header.getMetaData() ) {
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size());
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
if ( line instanceof VCFFormatHeaderLine ) {
line = formatStandards.repair((VCFFormatHeaderLine) line);
} else if ( line instanceof VCFInfoHeaderLine) {

View File

@ -25,8 +25,6 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
@ -34,7 +32,6 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
@ -129,7 +126,7 @@ public class VCFUtils {
if ( source.getRecordType().equals(VariantContext.class)) {
VCFHeader header = (VCFHeader)source.getHeader();
if ( header != null )
fields.addAll(header.getMetaData());
fields.addAll(header.getMetaDataInSortedOrder());
}
}
@ -160,7 +157,7 @@ public class VCFUtils {
// todo -- needs to remove all version headers from sources and add its own VCF version line
for ( VCFHeader source : headers ) {
//System.out.printf("Merging in header %s%n", source);
for ( VCFHeaderLine line : source.getMetaData()) {
for ( VCFHeaderLine line : source.getMetaDataInSortedOrder()) {
String key = line.getKey();
if ( line instanceof VCFIDHeaderLine )
@ -250,7 +247,7 @@ public class VCFUtils {
* @param refDict the SAM formatted reference sequence dictionary
*/
public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaData(), referenceFile, refDict), oldHeader.getGenotypeSamples());
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples());
}
public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {

View File

@ -260,6 +260,7 @@ public class VariantContextBuilder {
return this;
}
@Requires({"filter != null", "!filter.equals(\"PASS\")"})
public VariantContextBuilder filter(final String filter) {
if ( this.filters == null ) this.filters = new LinkedHashSet<String>(1);
this.filters.add(filter);

View File

@ -644,7 +644,9 @@ public class VariantContextUtils {
if ( setKey != null ) {
attributes.put(setKey, setValue);
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { attributesWithMaxAC.put(setKey, vcWithMaxAC.getSource()); }
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
attributesWithMaxAC.put(setKey, setValue);
}
}
}

View File

@ -268,7 +268,12 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
// treat errors as no match
jexl.put(exp, value == null ? false : value);
} catch (Exception e) {
throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
// if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
// todo - might be safer if we explicitly checked for an exception type, but Apache's API doesn't seem to have that ability
if (e.getMessage().contains("undefined variable"))
jexl.put(exp,false);
else
throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
}
}

View File

@ -109,7 +109,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
// --------------------------------------------------------------------------------
@Override
public void writeHeader(final VCFHeader header) {
public void writeHeader(VCFHeader header) {
// make sure the header is sorted correctly
header = new VCFHeader(header.getMetaDataInSortedOrder(), header.getGenotypeSamples());
// create the config offsets map
if ( header.getContigLines().isEmpty() ) {
if ( ALLOW_MISSING_CONTIG_LINES ) {

View File

@ -87,13 +87,13 @@ class VCFWriter extends IndexingVariantContextWriter {
final boolean doNotWriteGenotypes,
final String versionLine,
final String streamNameForError) {
header = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header;
try {
// the file format field needs to be written first
writer.write(versionLine + "\n");
for ( VCFHeaderLine line : header.getMetaData() ) {
for ( VCFHeaderLine line : header.getMetaDataInSortedOrder() ) {
if ( VCFHeaderVersion.isFormatString(line.getKey()) )
continue;

View File

@ -70,6 +70,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testComplexSelection--" + testfile, spec);
}
@Test
public void testNonExistingFieldSelection() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -env -ef -select 'foo!=0||DP>0' --variant " + testfile),
1,
Arrays.asList("44e77cea624cfff2b8acc3a4b30485cb") // should yield empty vcf because the foo!=0 will yield complete expression false
);
spec.disableShadowBCF();
executeTest("testNonExistingSelection--" + testfile, spec);
}
@Test
public void testSampleExclusion() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";

View File

@ -119,7 +119,6 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
if (!clipLeft.isEmpty()) {
// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd()));
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
assertUnclippedLimits(read, clipLeft);
}
@ -137,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest {
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
for (int i = alnStart; i <= alnEnd; i++) {
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
assertUnclippedLimits(read, clipRight);
}
@ -278,7 +277,6 @@ public class ReadClipperUnitTest extends BaseTest {
private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) {
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone
assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped
}
/**
@ -294,12 +292,6 @@ public class ReadClipperUnitTest extends BaseTest {
}
}
private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) {
if (read.getCigarString().contains("M"))
Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
}
private boolean startsWithInsertion(Cigar cigar) {
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
}

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
@ -26,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest {
private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec();
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount);
Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount);
return header;
}
@ -98,7 +97,7 @@ public class VCFHeaderUnitTest extends BaseTest {
} catch (IOException e) {
Assert.fail("Unable to make a temp file!");
}
for (VCFHeaderLine line : header.getMetaData())
for (VCFHeaderLine line : header.getMetaDataInSortedOrder())
pw.println(line);
pw.close();
Assert.assertEquals(md5SumFile(myTempFile), md5sum);

View File

@ -59,7 +59,7 @@ public class VCFIntegrationTest extends WalkerTest {
executeTest("Test writing samtools WEx BCF example", spec1);
}
@Test(enabled = false) // TODO disabled because current BCF2 is 1 based
@Test(enabled = false)
public void testReadingSamtoolsWExBCFExample() {
String testVCF = privateTestDir + "ex2.bcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";

View File

@ -106,7 +106,7 @@ public class VariantContextTestProvider {
for ( final VariantContext vc : vcs )
if ( vc.hasGenotypes() )
samples.addAll(vc.getSampleNames());
this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaData(), samples);
this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples);
this.vcs = vcs;
}
@ -885,12 +885,12 @@ public class VariantContextTestProvider {
}
public static void assertEquals(final VCFHeader actual, final VCFHeader expected) {
Assert.assertEquals(actual.getMetaData().size(), expected.getMetaData().size(), "No VCF header lines");
Assert.assertEquals(actual.getMetaDataInSortedOrder().size(), expected.getMetaDataInSortedOrder().size(), "No VCF header lines");
// for some reason set.equals() is returning false but all paired elements are .equals(). Perhaps compare to is busted?
//Assert.assertEquals(actual.getMetaData(), expected.getMetaData());
final List<VCFHeaderLine> actualLines = new ArrayList<VCFHeaderLine>(actual.getMetaData());
final List<VCFHeaderLine> expectedLines = new ArrayList<VCFHeaderLine>(expected.getMetaData());
//Assert.assertEquals(actual.getMetaDataInInputOrder(), expected.getMetaDataInInputOrder());
final List<VCFHeaderLine> actualLines = new ArrayList<VCFHeaderLine>(actual.getMetaDataInSortedOrder());
final List<VCFHeaderLine> expectedLines = new ArrayList<VCFHeaderLine>(expected.getMetaDataInSortedOrder());
for ( int i = 0; i < actualLines.size(); i++ ) {
Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines");
}

View File

@ -39,9 +39,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
@ -166,7 +163,7 @@ public class VCFWriterUnitTest extends BaseTest {
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field);
index++;
}
Assert.assertEquals(header.getMetaData().size(), metaData.size());
Assert.assertEquals(header.getMetaDataInSortedOrder().size(), metaData.size());
index = 0;
for (String key : header.getGenotypeSamples()) {
Assert.assertTrue(additionalColumns.contains(key));