diff --git a/ivy.xml b/ivy.xml
index f76880b94..5a8c3986a 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -97,7 +97,7 @@
-
+
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index 94ed23caf..c201e95f0 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -289,7 +289,7 @@ public abstract class ArgumentTypeDescriptor {
return field.isAnnotationPresent(Hidden.class);
}
- public Class makeRawTypeIfNecessary(Type t) {
+ public static Class makeRawTypeIfNecessary(Type t) {
if ( t == null )
return null;
else if ( t instanceof ParameterizedType )
@@ -300,6 +300,115 @@ public abstract class ArgumentTypeDescriptor {
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
}
}
+
+ /**
+ * The actual argument parsing method.
+ * @param source source
+ * @param type type to check
+ * @param matches matches
+ * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
+ */
+ protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) {
+ ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
+ String value = getArgumentValue(defaultDefinition, matches);
+ @SuppressWarnings("unchecked")
+ Class extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
+ String name = defaultDefinition.fullName;
+
+ return parseBinding(value, parameterType, type, name, tags, source.field.getName());
+ }
+
+ /**
+ *
+ * @param value The source of the binding
+ * @param parameterType The Tribble Feature parameter type
+ * @param bindingClass The class type for the binding (ex: RodBinding, IntervalBinding, etc.) Must have the correct constructor for creating the binding.
+ * @param bindingName The name of the binding passed to the constructor.
+ * @param tags Tags for the binding used for parsing and passed to the constructor.
+ * @param fieldName The name of the field that was parsed. Used for error reporting.
+ * @return The newly created binding object of type bindingClass.
+ */
+ public static Object parseBinding(String value, Class extends Feature> parameterType, Type bindingClass,
+ String bindingName, Tags tags, String fieldName) {
+ try {
+ String tribbleType = null;
+ // must have one or two tag values here
+ if ( tags.getPositionalTags().size() > 2 ) {
+ throw new UserException.CommandLineException(
+ String.format("Unexpected number of positional tags for argument %s : %s. " +
+ "Rod bindings only support -X:type and -X:name,type argument styles",
+ value, fieldName));
+ } else if ( tags.getPositionalTags().size() == 2 ) {
+ // -X:name,type style
+ bindingName = tags.getPositionalTags().get(0);
+ tribbleType = tags.getPositionalTags().get(1);
+
+ FeatureManager manager = new FeatureManager();
+ if ( manager.getByName(tribbleType) == null )
+ throw new UserException.UnknownTribbleType(
+ tribbleType,
+ String.format("Unable to find tribble type '%s' provided on the command line. " +
+ "Please select a correct type from among the supported types:%n%s",
+ tribbleType, manager.userFriendlyListOfAvailableFeatures(parameterType)));
+
+ } else {
+ // case with 0 or 1 positional tags
+ FeatureManager manager = new FeatureManager();
+
+ // -X:type style is a type when we cannot determine the type dynamically
+ String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
+ if ( tag1 != null ) {
+ if ( manager.getByName(tag1) != null ) // this a type
+ tribbleType = tag1;
+ else
+ bindingName = tag1;
+ }
+
+ if ( tribbleType == null ) {
+ // try to determine the file type dynamically
+ File file = new File(value);
+ if ( file.canRead() && file.isFile() ) {
+ FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
+ if ( featureDescriptor != null ) {
+ tribbleType = featureDescriptor.getName();
+ logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
+ }
+ }
+
+ if ( tribbleType == null ) {
+ // IntervalBinding can be created from a normal String
+ Class rawType = (makeRawTypeIfNecessary(bindingClass));
+ try {
+ return rawType.getConstructor(String.class).newInstance(value);
+ } catch (NoSuchMethodException e) {
+ /* ignore */
+ }
+
+ if ( ! file.exists() ) {
+ throw new UserException.CouldNotReadInputFile(file, "file does not exist");
+ } else if ( ! file.canRead() || ! file.isFile() ) {
+ throw new UserException.CouldNotReadInputFile(file, "file could not be read");
+ } else {
+ throw new UserException.CommandLineException(
+ String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
+ "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
+ manager.userFriendlyListOfAvailableFeatures(parameterType)));
+ }
+ }
+ }
+ }
+
+ Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
+ return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags);
+ } catch (Exception e) {
+ if ( e instanceof UserException )
+ throw ((UserException)e);
+ else
+ throw new UserException.CommandLineException(
+ String.format("Failed to parse value %s for argument %s. Message: %s",
+ value, fieldName, e.getMessage()));
+ }
+ }
}
/**
@@ -324,6 +433,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
@Override
+ @SuppressWarnings("unchecked")
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
return RodBinding.makeUnbound((Class extends Feature>)parameterType);
@@ -336,118 +446,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
- return parse(parsingEngine, source, type, matches, false);
- }
-
- /**
- * The actual argument parsing method.
- *
- * IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
- * that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
- * this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
- * is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
- * usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
- * side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
- * continue to input them out of order (whereas Tribble Features are ordered).
- *
- * @param parsingEngine parsing engine
- * @param source source
- * @param type type to check
- * @param matches matches
- * @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
- * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
- */
- public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
- ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
- String value = getArgumentValue( defaultDefinition, matches );
- Class extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
-
- try {
- String name = defaultDefinition.fullName;
- String tribbleType = null;
- Tags tags = getArgumentTags(matches);
- // must have one or two tag values here
- if ( tags.getPositionalTags().size() > 2 ) {
- throw new UserException.CommandLineException(
- String.format("Unexpected number of positional tags for argument %s : %s. " +
- "Rod bindings only support -X:type and -X:name,type argument styles",
- value, source.field.getName()));
- } if ( tags.getPositionalTags().size() == 2 ) {
- // -X:name,type style
- name = tags.getPositionalTags().get(0);
- tribbleType = tags.getPositionalTags().get(1);
- } else {
- // case with 0 or 1 positional tags
- FeatureManager manager = new FeatureManager();
-
- // -X:type style is a type when we cannot determine the type dynamically
- String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
- if ( tag1 != null ) {
- if ( manager.getByName(tag1) != null ) // this a type
- tribbleType = tag1;
- else
- name = tag1;
- }
-
- if ( tribbleType == null ) {
- // try to determine the file type dynamically
- File file = new File(value);
- if ( file.canRead() && file.isFile() ) {
- FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
- if ( featureDescriptor != null ) {
- tribbleType = featureDescriptor.getName();
- logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
- }
- }
-
- if ( tribbleType == null ) {
- // IntervalBindings allow streaming conversion of Strings
- if ( createIntervalBinding ) {
- return new IntervalBinding(value);
- }
-
- if ( ! file.exists() ) {
- throw new UserException.CouldNotReadInputFile(file, "file does not exist");
- } else if ( ! file.canRead() || ! file.isFile() ) {
- throw new UserException.CouldNotReadInputFile(file, "file could not be read");
- } else {
- throw new UserException.CommandLineException(
- String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
- "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
- manager.userFriendlyListOfAvailableFeatures(parameterType)));
- }
- }
- }
- }
-
- Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
- Object result;
- if ( createIntervalBinding ) {
- result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
- } else {
- RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
- parsingEngine.addTags(rbind, tags);
- parsingEngine.addRodBinding(rbind);
- result = rbind;
- }
- return result;
- } catch (InvocationTargetException e) {
- throw new UserException.CommandLineException(
- String.format("Failed to parse value %s for argument %s.",
- value, source.field.getName()));
- } catch (Exception e) {
- if ( e instanceof UserException )
- throw ((UserException)e);
- else
- throw new UserException.CommandLineException(
- String.format("Failed to parse value %s for argument %s. Message: %s",
- value, source.field.getName(), e.getMessage()));
- }
+ Tags tags = getArgumentTags(matches);
+ RodBinding rbind = (RodBinding)parseBinding(source, type, matches, tags);
+ parsingEngine.addTags(rbind, tags);
+ parsingEngine.addRodBinding(rbind);
+ return rbind;
}
}
/**
- * Parser for RodBinding objects
+ * Parser for IntervalBinding objects
*/
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
@@ -475,7 +483,7 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
*/
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
- return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
+ return parseBinding(source, type, matches, getArgumentTags(matches));
}
}
@@ -783,7 +791,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
Class extends Multiplexer> multiplexerType = dependentArgument.field.getAnnotation(Multiplex.class).value();
- Constructor extends Multiplexer> multiplexerConstructor = null;
+ Constructor extends Multiplexer> multiplexerConstructor;
try {
multiplexerConstructor = multiplexerType.getConstructor(sourceTypes);
multiplexerConstructor.setAccessible(true);
@@ -792,7 +800,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw new ReviewedStingException(String.format("Unable to find constructor for class %s with parameters %s",multiplexerType.getName(),Arrays.deepToString(sourceFields)),ex);
}
- Multiplexer multiplexer = null;
+ Multiplexer multiplexer;
try {
multiplexer = multiplexerConstructor.newInstance(sourceValues);
}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
index 452309e89..26af49e12 100755
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
@@ -78,24 +78,7 @@ public abstract class ParsingMethod {
String argument = matcher.group(1).trim();
- Tags tags = new Tags();
- if(matcher.group(2) != null) {
- for(String tag: Utils.split(matcher.group(2),",")) {
- // Check for presence of an '=' sign, indicating a key-value pair in the tag line.
- int equalDelimiterPos = tag.indexOf('=');
- if(equalDelimiterPos >= 0) {
- // Sanity check; ensure that there aren't multiple '=' in this key-value pair.
- if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
- throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
- "key-value tags are of the form =, and neither key " +
- "nor value contain the '=' character", tag, argument));
- tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
- }
- else
- tags.addPositionalTag(tag);
-
- }
- }
+ Tags tags = parseTags(argument, matcher.group(2));
// Find the most appropriate argument definition for the given argument.
ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher );
@@ -105,6 +88,28 @@ public abstract class ParsingMethod {
return new ArgumentMatch(argument,argumentDefinition,position,tags);
}
+ public static Tags parseTags(String argument, String tagString) {
+ Tags tags = new Tags();
+ if (tagString != null) {
+ for(String tag: Utils.split(tagString, ",")) {
+ // Check for presence of an '=' sign, indicating a key-value pair in the tag line.
+ int equalDelimiterPos = tag.indexOf('=');
+ if(equalDelimiterPos >= 0) {
+ // Sanity check; ensure that there aren't multiple '=' in this key-value pair.
+ if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
+ throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
+ "key-value tags are of the form =, and neither key " +
+ "nor value contain the '=' character", tag, argument));
+ tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
+ }
+ else
+ tags.addPositionalTag(tag);
+
+ }
+ }
+ return tags;
+ }
+
/**
* A command-line argument always starts with an alphabetical character or underscore followed by any word character.
*/
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 6fa70f437..68680dd10 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -30,7 +30,6 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
-import org.broad.tribble.Feature;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
@@ -54,9 +53,9 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
@@ -582,7 +581,6 @@ public class GenomeAnalysisEngine {
* Setup the intervals to be processed
*/
protected void initializeIntervals() {
-
// return if no interval arguments at all
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
return;
@@ -590,17 +588,22 @@ public class GenomeAnalysisEngine {
// Note that the use of '-L all' is no longer supported.
// if include argument isn't given, create new set of all possible intervals
- GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
- GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
- loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding));
+
+ Pair includeExcludePair = IntervalUtils.parseIntervalBindingsPair(
+ this.referenceDataSource,
+ argCollection.intervals,
+ argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding,
+ argCollection.excludeIntervals);
+
+ GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
+ GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
// if no exclude arguments, can return parseIntervalArguments directly
- if ( argCollection.excludeIntervals == null )
+ if ( excludeSortedSet == null )
intervals = includeSortedSet;
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
else {
- GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
// logging messages only printed when exclude (-XL) arguments are given
@@ -613,43 +616,6 @@ public class GenomeAnalysisEngine {
}
}
- /**
- * Loads the intervals relevant to the current execution
- * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
- * @param rule interval merging rule
- * @return A sorted, merged list of all intervals specified in this arg list.
- */
- protected GenomeLocSortedSet loadIntervals( final List> argList, final IntervalSetRule rule ) {
- return loadIntervals(argList, rule, 0);
- }
-
- /**
- * Loads the intervals relevant to the current execution
- * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
- * @param rule interval merging rule
- * @param padding how much to pad the intervals
- * @return A sorted, merged list of all intervals specified in this arg list.
- */
- protected GenomeLocSortedSet loadIntervals( final List> argList, final IntervalSetRule rule, final int padding ) {
-
- List allIntervals = new ArrayList();
- for ( IntervalBinding intervalBinding : argList ) {
- List intervals = intervalBinding.getIntervals(this.getGenomeLocParser());
-
- if ( intervals.isEmpty() ) {
- logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
- }
-
- if ( padding > 0 ) {
- intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding);
- }
-
- allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
- }
-
- return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
- }
-
/**
* Add additional, externally managed IO streams for inputs.
*
@@ -830,7 +796,8 @@ public class GenomeAnalysisEngine {
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
}
}
- RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,header,validationExclusionType);
+
+ RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
List dataSources = new ArrayList();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
@@ -854,6 +821,15 @@ public class GenomeAnalysisEngine {
return readsDataSource.getHeader();
}
+ public boolean lenientVCFProcessing() {
+ return lenientVCFProcessing(argCollection.unsafe);
+ }
+
+ public static boolean lenientVCFProcessing(final ValidationExclusion.TYPE val) {
+ return val == ValidationExclusion.TYPE.ALL
+ || val == ValidationExclusion.TYPE.LENIENT_VCF_PROCESSING;
+ }
+
/**
* Returns the unmerged SAM file header for an individual reader.
* @param reader The reader.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index babbb7ab8..13c737a2e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -347,9 +347,6 @@ public class GATKArgumentCollection {
public boolean USE_SLOW_GENOTYPES = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
- @Argument(fullName="allowMissingVCFHeaders",shortName = "allowMissingVCFHeaders",doc="If provided, the GATK will write out VCF files that contain INFO, FILTER, and FORMAT fields not found in the VCF header",required=false)
- public boolean allowMissingVCFHeaders = false;
-
/**
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
index 577f7929a..52c77326a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
@@ -40,6 +40,7 @@ public class ValidationExclusion {
ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities
+ LENIENT_VCF_PROCESSING, // allow non-standard values for standard VCF header lines. Don't worry about size differences between header and values, etc.
@EnumerationArgumentDefault // set the ALL value to the default value, so if they specify just -U, we get the ALL
ALL // do not check for all of the above conditions, DEFAULT
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
index d0fdae639..fb05a6b04 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
@@ -107,8 +107,10 @@ public class VariantContextWriterStorage implements Storage, Var
List options = new ArrayList();
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
- if ( engine.getArguments().allowMissingVCFHeaders ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
+ if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
index 3f03b30dd..b5d5deedb 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
@@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.classloader.PluginManager;
+import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -85,16 +86,18 @@ public class FeatureManager {
private final PluginManager pluginManager;
private final Collection featureDescriptors = new TreeSet();
private final VCFHeader headerForRepairs;
+ private final boolean lenientVCFProcessing;
/**
* Construct a FeatureManager without a master VCF header
*/
public FeatureManager() {
- this(null);
+ this(null, false);
}
- public FeatureManager(final VCFHeader headerForRepairs) {
+ public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
this.headerForRepairs = headerForRepairs;
+ this.lenientVCFProcessing = lenientVCFProcessing;
pluginManager = new PluginManager(FeatureCodec.class, "Codecs", "Codec");
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
@@ -252,8 +255,11 @@ public class FeatureManager {
((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
- if ( codex instanceof VCFCodec)
+ if ( codex instanceof VCFCodec )
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
+ if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
+ ((AbstractVCFCodec)codex).disableOnTheFlyModifications();
+
return codex;
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
index 25e005601..e183fe169 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@@ -34,6 +34,7 @@ import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
@@ -98,7 +99,7 @@ public class RMDTrackBuilder { // extends PluginManager {
this.dict = dict;
this.validationExclusionType = validationExclusionType;
this.genomeLocParser = genomeLocParser;
- this.featureManager = new FeatureManager(headerForRepairs);
+ this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
index d91ddd221..01fa92b8c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
@@ -71,11 +71,13 @@ public class BQSRGatherer extends Gatherer {
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
File recal_out = new File(output.getName() + ".original");
RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
- RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
+ // TODO -- fix me
+ //RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
}
else if (!RAC.NO_PLOTS) {
File recal_out = new File(output.getName() + ".recal");
- RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
+ // TODO -- fix me
+ //RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
}
generalReport.output(outputFile);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
deleted file mode 100644
index 29eecfbb1..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
+++ /dev/null
@@ -1,329 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-
-import java.util.*;
-
-/**
- * This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR
- *
- * It also handles the event type "covariate" which is not exactly a covariate, but is added as a key to the hashmap. The Key Manager will
- * add the event type as a bitset to the end of the covariate bitset key. This way, it won't get int the way of masking the information
- * out of the key for the actual covariates, and having the covariates handle it. The key manager handles the event type.
- *
- * The keys represented by this key manager will always have the same order:
- *
- * RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate1, OptionalCovariateID, EventType
- * RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate2, OptionalCovariateID, EventType
- * ...
- * RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariateN, OptionalCovariateID, EventType
- *
- *
- * Note that Optional Covariates are optional, and the Key Manager should operate without them if necessary.
- *
- * @author Mauricio Carneiro
- * @since 3/6/12
- */
-public class BQSRKeyManager {
-
- private final Covariate[] requiredCovariates;
- private final Covariate[] optionalCovariates;
- private final RequiredCovariateInfo[] requiredCovariatesInfo;
- private final OptionalCovariateInfo[] optionalCovariatesInfo;
- private final Map covariateNameToIDMap;
-
- private int nRequiredBits; // Number of bits used to represent the required covariates
-
- private final int optionalCovariateOffset;
- private final int optionalCovariateIDOffset;
-
- private final long optionalCovariateMask; // Standard mask for optional covariates key
- private final long optionalCovariateIDMask; // Standard mask for optional covariates order key
- private final long eventIDMask; // Standard mask for event ID
-
- /**
- * Initializes the KeyManager with the total number of covariates to use
- *
- * @param requiredCovariates the ordered list of required covariates
- * @param optionalCovariates the ordered list of optional covariates
- */
- public BQSRKeyManager(final List requiredCovariates, final List optionalCovariates) {
- this.requiredCovariates = new Covariate[requiredCovariates.size()];
- this.optionalCovariates = new Covariate[optionalCovariates.size()];
- requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
- optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
- covariateNameToIDMap = new HashMap(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
-
- nRequiredBits = 0;
- for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
- final Covariate required = requiredCovariates.get(i);
- final int nBits = required.numberOfBits(); // number of bits used by this covariate
- final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
- this.requiredCovariates[i] = required;
- requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
- nRequiredBits += nBits;
- }
-
- final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length);
- eventIDMask = genericMask(nRequiredBits, bitsInEventType);
-
- short id = 0;
- int nOptionalBits = 0;
- for (int i = 0; i < optionalCovariates.size(); i++) {
- final Covariate optional = optionalCovariates.get(i);
- nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
- this.optionalCovariates[i] = optional;
- optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
- final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
- covariateNameToIDMap.put(covariateName, id);
- id++;
- }
-
- optionalCovariateOffset = nRequiredBits + bitsInEventType;
- optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
- optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits;
- final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
- optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
-
- final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key
- if ( totalNumberOfBits > 64 )
- throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a long");
- }
-
- /**
- * Generates one key given the optional covariate (or none if it is null)
- *
- * Keys include all required covariates, the standard covariate and the event type.
- *
- * @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested)
- * @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
- * @return one key in long representation (non-negative) or -1 for a bad key
- */
- public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) {
-
- int keyIndex = 0;
- long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
- for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
- masterKey |= (allKeys[keyIndex++] << infoRequired.offset);
-
- final long eventKey = keyFromEvent(eventType); // create a key for the event type
- masterKey |= (eventKey << nRequiredBits);
-
- if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) {
- final long covariateKey = allKeys[keyIndex + optionalCovariateIndex];
- if (covariateKey < 0) // do not add "nulls" to the final set of keys
- return -1;
-
- masterKey |= (covariateKey << optionalCovariateOffset);
- masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset);
- }
-
- return masterKey;
- }
-
- /**
- * Generates one key for the covariates represented in Object[] key
- *
- * The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file)
- * and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many.
- *
- * Example key:
- * RG, QUAL, CYCLE, CYCLE_ID, EventType
- *
- * @param key list of objects produced by the required covariates followed by one or zero optional covariates.
- * @return a key representing these objects.
- */
- public long longFromKey(Object[] key) {
- int requiredCovariate = 0;
- long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
- for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
- masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset);
-
- final int eventIndex = key.length - 1; // the event type is always the last key
- final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
- masterKey |= (eventKey << nRequiredBits);
-
- if (optionalCovariatesInfo.length > 0) {
- final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
- final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
- final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
- final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
-
- final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
- masterKey |= (covariateKey << optionalCovariateOffset);
- masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
- }
-
- return masterKey;
- }
-
- /**
- * Covariate id can be either the covariate name (String) or the actual id (short). This method
- * finds it's type and converts accordingly to the short notation.
- *
- * @param id the string or short representation of the optional covariate id
- * @return the short representation of the optional covariate id.
- */
- private short parseCovariateID(final Object id) {
- return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id;
- }
-
- /**
- * Generates a key set of objects from a combined master key.
- *
- * Masks out each covariate independently and decodes their values (Object) into a keyset
- *
- * @param master the master representation of the keys
- * @return an object array with the values for each key
- */
- public List