Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
75e5a50b8a
|
|
@ -154,6 +154,13 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
|
|||
@Argument(fullName = "dont_compress_read_names", shortName = "nocmp_names", doc = "", required = false)
|
||||
protected boolean DONT_COMPRESS_READ_NAMES = false;
|
||||
|
||||
/**
|
||||
* Optionally hard clip all incoming reads to the desired intervals. The hard clips will happen exactly at the interval
|
||||
* border.
|
||||
*/
|
||||
@Argument(fullName = "hard_clip_to_interval", shortName = "clip_int", doc = "", required = false)
|
||||
protected boolean HARD_CLIP_TO_INTERVAL = false;
|
||||
|
||||
/**
|
||||
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
|
||||
* considered consensus.
|
||||
|
|
@ -166,7 +173,7 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
|
|||
* considered consensus.
|
||||
*/
|
||||
@Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false)
|
||||
protected double minIndelProportionToTriggerVariant = 0.01;
|
||||
protected double minIndelProportionToTriggerVariant = 0.05;
|
||||
|
||||
/**
|
||||
* Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this).
|
||||
|
|
@ -260,8 +267,14 @@ public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, Red
|
|||
read = ReadClipper.hardClipAdaptorSequence(read); // Strip away adaptor sequences, if any.
|
||||
if (!DONT_CLIP_LOW_QUAL_TAILS)
|
||||
read = ReadClipper.hardClipLowQualEnds(read, minTailQuality); // Clip low quality tails
|
||||
if (!isWholeGenome())
|
||||
mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval
|
||||
if (!isWholeGenome()) {
|
||||
if (HARD_CLIP_TO_INTERVAL)
|
||||
mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval
|
||||
else {
|
||||
mappedReads = new LinkedList<GATKSAMRecord>();
|
||||
mappedReads.add(read);
|
||||
}
|
||||
}
|
||||
else {
|
||||
mappedReads = new LinkedList<GATKSAMRecord>();
|
||||
if (!read.isEmpty())
|
||||
|
|
|
|||
|
|
@ -21,28 +21,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompression() {
|
||||
RRTest("testDefaultCompression ", L, "4c92d59d4a5292af1f968dc922c2c63e");
|
||||
RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervals() {
|
||||
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||
RRTest("testMultipleIntervals ", intervals, "97d5c3fda5551741676793ba325ec7ed");
|
||||
RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompression() {
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e6bc1cd0e9de961cf0fb1789bf6ab108");
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompression() {
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "f33ec7cd0b98eebd73d1025ca656cd7e");
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testIndelCompression() {
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "e6e2bc889e4f342a7fedc5d38b391d20");
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -62,7 +62,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testAddingReadAfterTailingTheStash() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("022931f032a4122cfe41e58e74d0aede")));
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
@ -33,7 +34,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
|
||||
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Binary covariate allows BQSR to recalibrate based on a binary covariate in the BAM file. This covariate should assume values of 1 and 0.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 7/6/12
|
||||
*/
|
||||
public class BinaryTagCovariate implements ExperimentalCovariate {
|
||||
|
||||
private String tag;
|
||||
|
||||
@Override
|
||||
public void initialize(RecalibrationArgumentCollection RAC) {
|
||||
tag = RAC.BINARY_TAG_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recordValues(GATKSAMRecord read, ReadCovariates values) {
|
||||
final Object tagObject = read.getAttribute(tag);
|
||||
|
||||
byte[] binaryTag;
|
||||
if (tagObject instanceof byte[])
|
||||
binaryTag = (byte[]) tagObject;
|
||||
else if (tagObject instanceof String) {
|
||||
int readLength = ((String) tagObject).length();
|
||||
binaryTag = new byte[readLength];
|
||||
for (int i = 0; i<readLength; i++)
|
||||
binaryTag[i] = Byte.decode(((String) tagObject).substring(i, i+1));
|
||||
}
|
||||
else
|
||||
throw new UserException("Binary tag is not a byte array (fast) or a string (slow). Type not supported");
|
||||
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
values.addCovariate((int) binaryTag[i], (int) binaryTag[i], (int) binaryTag[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(String str) {
|
||||
return Integer.decode(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(int key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int keyFromValue(Object value) {
|
||||
return Integer.decode((String) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -55,7 +55,7 @@ public interface Covariate {
|
|||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
|
||||
|
||||
/**
|
||||
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
* Used to get the covariate's value from input (Recalibration Report) file during on-the-fly recalibration
|
||||
*
|
||||
* @param str the key in string type (read from the csv)
|
||||
* @return the key in it's correct type.
|
||||
|
|
|
|||
|
|
@ -153,6 +153,11 @@ public class RecalibrationArgumentCollection {
|
|||
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
|
||||
public int QUANTIZING_LEVELS = 16;
|
||||
|
||||
/**
|
||||
* The tag name for the binary tag covariate (if using it)
|
||||
*/
|
||||
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
|
||||
public String BINARY_TAG_NAME = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
|
|
@ -205,6 +210,8 @@ public class RecalibrationArgumentCollection {
|
|||
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -290,6 +290,9 @@ public class RecalibrationReport {
|
|||
|
||||
else if (argument.equals("recalibration_report"))
|
||||
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("binary_tag_name"))
|
||||
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
|
||||
}
|
||||
|
||||
return RAC;
|
||||
|
|
|
|||
|
|
@ -27,15 +27,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -69,7 +66,7 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
|
||||
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), vcfCodec, false);
|
||||
VCFHeader header = (VCFHeader)reader.getHeader();
|
||||
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
|
||||
for ( VCFHeaderLine headerLine : header.getMetaDataInInputOrder() ) {
|
||||
String key = headerLine.getKey();
|
||||
if ( headerLine instanceof VCFIDHeaderLine)
|
||||
key += "_" + ((VCFIDHeaderLine) headerLine).getID();
|
||||
|
|
|
|||
|
|
@ -198,21 +198,19 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
else {
|
||||
final int refWindowStart = ref.getWindow().getStart();
|
||||
final int refWindowStop = ref.getWindow().getStop();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
|
||||
}
|
||||
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
|
||||
if (read.isEmpty())
|
||||
continue;
|
||||
|
||||
if (read.getSoftEnd() > ref.getWindow().getStop())
|
||||
if (!read.isEmpty() && (read.getSoftEnd() > refWindowStop && read.getSoftStart() < refWindowStop))
|
||||
read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop());
|
||||
|
||||
if (read.isEmpty())
|
||||
continue;
|
||||
|
||||
if (read.getSoftStart() < ref.getWindow().getStart())
|
||||
if (!read.isEmpty() && (read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart))
|
||||
read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart());
|
||||
|
||||
if (read.isEmpty())
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter
|
|||
if (useSingleSample != null) { // only want to output context for one sample
|
||||
Set<String> singSampSet = new TreeSet<String>();
|
||||
singSampSet.add(useSingleSample);
|
||||
header = new VCFHeader(header.getMetaData(), singSampSet);
|
||||
header = new VCFHeader(header.getMetaDataInSortedOrder(), singSampSet);
|
||||
}
|
||||
|
||||
innerWriter.writeHeader(header);
|
||||
|
|
|
|||
|
|
@ -225,10 +225,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
}
|
||||
|
||||
if( !dataManager.checkHasTrainingSet() ) {
|
||||
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -resource:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
if( !dataManager.checkHasTruthSet() ) {
|
||||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -resource:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -463,7 +463,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
stream.println("dev.off()");
|
||||
|
||||
stream.println("if (exists(\"compactPDF\")) {");
|
||||
stream.println("compactPDF(ouputPDF)");
|
||||
stream.println("compactPDF(outputPDF)");
|
||||
stream.println("}");
|
||||
|
||||
stream.close();
|
||||
|
|
|
|||
|
|
@ -166,7 +166,6 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false)
|
||||
public boolean SUPPRESS_COMMAND_LINE_HEADER = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
|
||||
public boolean MERGE_INFO_WITH_MAX_AC = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples);
|
||||
writer.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
|
||||
|
||||
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaData();
|
||||
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder();
|
||||
baseWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
|
||||
writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, 200);
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
if ( vcfHeaders.containsKey(trackName) )
|
||||
metaData.addAll(vcfHeaders.get(trackName).getMetaData());
|
||||
metaData.addAll(vcfHeaders.get(trackName).getMetaDataInSortedOrder());
|
||||
if ( RECORD_ORIGINAL_LOCATION ) {
|
||||
metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record"));
|
||||
metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record"));
|
||||
|
|
|
|||
|
|
@ -385,10 +385,14 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
if ( value == null )
|
||||
builder.unfiltered();
|
||||
else {
|
||||
if ( value instanceof Integer )
|
||||
if ( value instanceof Integer ) {
|
||||
// fast path for single integer result
|
||||
builder.filter(getDictionaryString((Integer)value));
|
||||
else {
|
||||
final String filterString = getDictionaryString((Integer)value);
|
||||
if ( VCFConstants.PASSES_FILTERS_v4.equals(filterString))
|
||||
builder.passFilters();
|
||||
else
|
||||
builder.filter(filterString);
|
||||
} else {
|
||||
for ( final int offset : (List<Integer>)value )
|
||||
builder.filter(getDictionaryString(offset));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -84,8 +81,8 @@ public final class BCF2Utils {
|
|||
|
||||
boolean sawPASS = false;
|
||||
// set up the strings dictionary
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFIDHeaderLine) {
|
||||
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
|
||||
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
|
||||
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
if ( ! seen.contains(idLine.getID())) {
|
||||
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion version ) {
|
||||
this.version = version;
|
||||
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine>();
|
||||
Set<String> sampleNames = new LinkedHashSet<String>();
|
||||
int contigCounter = 0;
|
||||
// iterate over all the passed in strings
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class VCFHeader {
|
|||
}
|
||||
|
||||
// the associated meta data
|
||||
private final Set<VCFHeaderLine> mMetaData = new TreeSet<VCFHeaderLine>();
|
||||
private final Set<VCFHeaderLine> mMetaData = new LinkedHashSet<VCFHeaderLine>();
|
||||
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
|
||||
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
|
||||
private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new HashMap<String, VCFFilterHeaderLine>();
|
||||
|
|
@ -230,14 +230,22 @@ public class VCFHeader {
|
|||
}
|
||||
|
||||
/**
|
||||
* get the meta data, associated with this header
|
||||
* get the meta data, associated with this header, in sorted order
|
||||
*
|
||||
* @return a set of the meta data
|
||||
*/
|
||||
public Set<VCFHeaderLine> getMetaData() {
|
||||
Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
|
||||
public Set<VCFHeaderLine> getMetaDataInInputOrder() {
|
||||
return makeGetMetaDataSet(mMetaData);
|
||||
}
|
||||
|
||||
public Set<VCFHeaderLine> getMetaDataInSortedOrder() {
|
||||
return makeGetMetaDataSet(new TreeSet<VCFHeaderLine>(mMetaData));
|
||||
}
|
||||
|
||||
private static Set<VCFHeaderLine> makeGetMetaDataSet(final Set<VCFHeaderLine> headerLinesInSomeOrder) {
|
||||
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
|
||||
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_1.getFormatString(), VCFHeaderVersion.VCF4_1.getVersionString()));
|
||||
lines.addAll(mMetaData);
|
||||
lines.addAll(headerLinesInSomeOrder);
|
||||
return Collections.unmodifiableSet(lines);
|
||||
}
|
||||
|
||||
|
|
@ -247,7 +255,7 @@ public class VCFHeader {
|
|||
* @return
|
||||
*/
|
||||
public VCFHeaderLine getMetaDataLine(final String key) {
|
||||
for (final VCFHeaderLine line: getMetaData()) {
|
||||
for (final VCFHeaderLine line: mMetaData) {
|
||||
if ( line.getKey().equals(key) )
|
||||
return line;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -60,8 +59,8 @@ public class VCFStandardHeaderLines {
|
|||
@Requires("header != null")
|
||||
@Ensures("result != null")
|
||||
public static VCFHeader repairStandardHeaderLines(final VCFHeader header) {
|
||||
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaData().size());
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size());
|
||||
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
|
||||
if ( line instanceof VCFFormatHeaderLine ) {
|
||||
line = formatStandards.repair((VCFFormatHeaderLine) line);
|
||||
} else if ( line instanceof VCFInfoHeaderLine) {
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -34,7 +32,6 @@ import org.broad.tribble.Feature;
|
|||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -129,7 +126,7 @@ public class VCFUtils {
|
|||
if ( source.getRecordType().equals(VariantContext.class)) {
|
||||
VCFHeader header = (VCFHeader)source.getHeader();
|
||||
if ( header != null )
|
||||
fields.addAll(header.getMetaData());
|
||||
fields.addAll(header.getMetaDataInSortedOrder());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -160,7 +157,7 @@ public class VCFUtils {
|
|||
// todo -- needs to remove all version headers from sources and add its own VCF version line
|
||||
for ( VCFHeader source : headers ) {
|
||||
//System.out.printf("Merging in header %s%n", source);
|
||||
for ( VCFHeaderLine line : source.getMetaData()) {
|
||||
for ( VCFHeaderLine line : source.getMetaDataInSortedOrder()) {
|
||||
|
||||
String key = line.getKey();
|
||||
if ( line instanceof VCFIDHeaderLine )
|
||||
|
|
@ -250,7 +247,7 @@ public class VCFUtils {
|
|||
* @param refDict the SAM formatted reference sequence dictionary
|
||||
*/
|
||||
public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
|
||||
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaData(), referenceFile, refDict), oldHeader.getGenotypeSamples());
|
||||
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples());
|
||||
}
|
||||
|
||||
public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
|
||||
|
|
|
|||
|
|
@ -260,6 +260,7 @@ public class VariantContextBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
@Requires({"filter != null", "!filter.equals(\"PASS\")"})
|
||||
public VariantContextBuilder filter(final String filter) {
|
||||
if ( this.filters == null ) this.filters = new LinkedHashSet<String>(1);
|
||||
this.filters.add(filter);
|
||||
|
|
|
|||
|
|
@ -644,7 +644,9 @@ public class VariantContextUtils {
|
|||
|
||||
if ( setKey != null ) {
|
||||
attributes.put(setKey, setValue);
|
||||
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { attributesWithMaxAC.put(setKey, vcWithMaxAC.getSource()); }
|
||||
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||
attributesWithMaxAC.put(setKey, setValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -268,7 +268,12 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
// treat errors as no match
|
||||
jexl.put(exp, value == null ? false : value);
|
||||
} catch (Exception e) {
|
||||
throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
|
||||
// if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
|
||||
// todo - might be safer if we explicitly checked for an exception type, but Apache's API doesn't seem to have that ability
|
||||
if (e.getMessage().contains("undefined variable"))
|
||||
jexl.put(exp,false);
|
||||
else
|
||||
throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -109,7 +109,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void writeHeader(final VCFHeader header) {
|
||||
public void writeHeader(VCFHeader header) {
|
||||
// make sure the header is sorted correctly
|
||||
header = new VCFHeader(header.getMetaDataInSortedOrder(), header.getGenotypeSamples());
|
||||
|
||||
// create the config offsets map
|
||||
if ( header.getContigLines().isEmpty() ) {
|
||||
if ( ALLOW_MISSING_CONTIG_LINES ) {
|
||||
|
|
|
|||
|
|
@ -87,13 +87,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
final boolean doNotWriteGenotypes,
|
||||
final String versionLine,
|
||||
final String streamNameForError) {
|
||||
header = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||
header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header;
|
||||
|
||||
try {
|
||||
// the file format field needs to be written first
|
||||
writer.write(versionLine + "\n");
|
||||
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
for ( VCFHeaderLine line : header.getMetaDataInSortedOrder() ) {
|
||||
if ( VCFHeaderVersion.isFormatString(line.getKey()) )
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -70,6 +70,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonExistingFieldSelection() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -env -ef -select 'foo!=0||DP>0' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("44e77cea624cfff2b8acc3a4b30485cb") // should yield empty vcf because the foo!=0 will yield complete expression false
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testNonExistingSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSampleExclusion() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
|
|
|||
|
|
@ -119,7 +119,6 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
||||
|
||||
if (!clipLeft.isEmpty()) {
|
||||
// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd()));
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
assertUnclippedLimits(read, clipLeft);
|
||||
}
|
||||
|
|
@ -137,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
for (int i = alnStart; i <= alnEnd; i++) {
|
||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
assertUnclippedLimits(read, clipRight);
|
||||
}
|
||||
|
|
@ -278,7 +277,6 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) {
|
||||
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||
assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone
|
||||
assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -294,12 +292,6 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) {
|
||||
if (read.getCigarString().contains("M"))
|
||||
Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
|
||||
|
||||
private boolean startsWithInsertion(Cigar cigar) {
|
||||
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf;
|
|||
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
|
@ -26,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
private VCFHeader createHeader(String headerStr) {
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr))));
|
||||
Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount);
|
||||
Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount);
|
||||
return header;
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +97,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
} catch (IOException e) {
|
||||
Assert.fail("Unable to make a temp file!");
|
||||
}
|
||||
for (VCFHeaderLine line : header.getMetaData())
|
||||
for (VCFHeaderLine line : header.getMetaDataInSortedOrder())
|
||||
pw.println(line);
|
||||
pw.close();
|
||||
Assert.assertEquals(md5SumFile(myTempFile), md5sum);
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
@Test(enabled = false) // TODO disabled because current BCF2 is 1 based
|
||||
@Test(enabled = false)
|
||||
public void testReadingSamtoolsWExBCFExample() {
|
||||
String testVCF = privateTestDir + "ex2.bcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ public class VariantContextTestProvider {
|
|||
for ( final VariantContext vc : vcs )
|
||||
if ( vc.hasGenotypes() )
|
||||
samples.addAll(vc.getSampleNames());
|
||||
this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaData(), samples);
|
||||
this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples);
|
||||
this.vcs = vcs;
|
||||
}
|
||||
|
||||
|
|
@ -885,12 +885,12 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
public static void assertEquals(final VCFHeader actual, final VCFHeader expected) {
|
||||
Assert.assertEquals(actual.getMetaData().size(), expected.getMetaData().size(), "No VCF header lines");
|
||||
Assert.assertEquals(actual.getMetaDataInSortedOrder().size(), expected.getMetaDataInSortedOrder().size(), "No VCF header lines");
|
||||
|
||||
// for some reason set.equals() is returning false but all paired elements are .equals(). Perhaps compare to is busted?
|
||||
//Assert.assertEquals(actual.getMetaData(), expected.getMetaData());
|
||||
final List<VCFHeaderLine> actualLines = new ArrayList<VCFHeaderLine>(actual.getMetaData());
|
||||
final List<VCFHeaderLine> expectedLines = new ArrayList<VCFHeaderLine>(expected.getMetaData());
|
||||
//Assert.assertEquals(actual.getMetaDataInInputOrder(), expected.getMetaDataInInputOrder());
|
||||
final List<VCFHeaderLine> actualLines = new ArrayList<VCFHeaderLine>(actual.getMetaDataInSortedOrder());
|
||||
final List<VCFHeaderLine> expectedLines = new ArrayList<VCFHeaderLine>(expected.getMetaDataInSortedOrder());
|
||||
for ( int i = 0; i < actualLines.size(); i++ ) {
|
||||
Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,9 +39,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -166,7 +163,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field);
|
||||
index++;
|
||||
}
|
||||
Assert.assertEquals(header.getMetaData().size(), metaData.size());
|
||||
Assert.assertEquals(header.getMetaDataInSortedOrder().size(), metaData.size());
|
||||
index = 0;
|
||||
for (String key : header.getGenotypeSamples()) {
|
||||
Assert.assertTrue(additionalColumns.contains(key));
|
||||
|
|
|
|||
Loading…
Reference in New Issue