diff --git a/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java
index a31a2595c..9f60127d8 100755
--- a/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java
+++ b/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java
@@ -14,7 +14,7 @@ import java.util.Map;
*
* A class representing a key=value entry for genotype FORMAT fields in the VCF header
*/
-public class VCFFormatHeaderLine extends VCFHeaderLine {
+public class VCFFormatHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
// the format field types
public enum FORMAT_TYPE {
diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java b/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java
index 9aa7ae1dc..872f8a979 100644
--- a/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java
+++ b/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java
@@ -1,5 +1,7 @@
package org.broad.tribble.vcf;
+import org.broadinstitute.sting.utils.StingException;
+
import java.util.*;
/**
@@ -48,6 +50,9 @@ class VCF4Parser implements VCFLineParser {
for (Map.Entry entry : keyValues.entrySet()) {
if (start) start = false;
else builder.append(",");
+
+ if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues);
+
builder.append(entry.getKey());
builder.append("=");
builder.append(entry.getValue().toString().contains(",") ||
diff --git a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java
index 8a460879d..b788e8156 100755
--- a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java
+++ b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java
@@ -1,7 +1,5 @@
package org.broad.tribble.vcf;
-import org.broad.tribble.util.ParsingUtils;
-
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -14,7 +12,7 @@ import java.util.Map;
*
* A class representing a key=value entry for INFO fields in the VCF header
*/
-public class VCFInfoHeaderLine extends VCFHeaderLine {
+public class VCFInfoHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
// the info field types
public enum INFO_TYPE {
@@ -106,6 +104,7 @@ public class VCFInfoHeaderLine extends VCFHeaderLine {
mType == other.mType;
}
+ @Override
public String getmName() {
return mName;
}
diff --git a/java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java
new file mode 100755
index 000000000..8e9f1e20c
--- /dev/null
+++ b/java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broad.tribble.vcf;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: Jul 2, 2010
+ * Time: 2:40:45 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public interface VCFNamedHeaderLine {
+ String getmName();
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
index 341ea8214..9784058e5 100644
--- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
+++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
@@ -175,6 +175,9 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
else if(inputFile.getName().toLowerCase().endsWith(".bam")) {
unpackedReads.add( inputFile );
}
+ else if(inputFile.getName().equals("-")) {
+ unpackedReads.add( new File("/dev/stdin") );
+ }
else {
Utils.scareUser(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 445884459..4ea246108 100755
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -728,9 +728,9 @@ public class GenomeAnalysisEngine {
ValidationExclusion exclusions) {
// Use monolithic sharding if no index is present. Monolithic sharding is always required for the original
// sharding system; it's required with the new sharding system only for locus walkers.
- if(readsDataSource != null && !readsDataSource.hasIndex() && walker instanceof LocusWalker) {
+ if(readsDataSource != null && !readsDataSource.hasIndex() ) {
if(!exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM) || intervals != null)
- throw new StingException("The GATK cannot currently process unindexed BAM files");
+ throw new StingException("The GATK cannot currently process unindexed BAM files without the -U ALLOW_UNINDEXED_BAM, or with unindexed BAM files with the -L option");
Shard.ShardType shardType;
if(walker instanceof LocusWalker) {
diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java
index 085515fc1..f4a261aa0 100755
--- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java
+++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java
@@ -165,7 +165,7 @@ public class VariantContextUtils {
}
public static VariantContext simpleMerge(Collection unsortedVCs) {
- return simpleMerge(unsortedVCs, null, EnumSet.of(MergeType.INTERSECT_VARIANTS, MergeType.UNSORTED_GENOTYPES), false);
+ return simpleMerge(unsortedVCs, null, EnumSet.of(MergeType.INTERSECT_VARIANTS, MergeType.UNSORTED_GENOTYPES), false, false);
}
@@ -179,7 +179,7 @@ public class VariantContextUtils {
* @param mergeOptions
* @return
*/
- public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, EnumSet mergeOptions, boolean annotateOrigin ) {
+ public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, EnumSet mergeOptions, boolean annotateOrigin, boolean printMessages ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
@@ -256,7 +256,7 @@ public class VariantContextUtils {
attributes.put(VCFRecord.DEPTH_KEY, String.valueOf(depth));
VariantContext merged = new VariantContext(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
- //if ( remapped ) System.out.printf("Remapped => %s%n", merged);
+ if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged;
}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java
index 43cfcba78..18797cd1b 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java
@@ -434,11 +434,8 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
double GTQual = VariantContext.NO_NEG_LOG_10PERROR;
Set genotypeFilters = null;
- String sampleName = sampleNameIterator.next();
-
-
- // todo -- the parsing of attributes could be made lazy for performance
Map gtAttributes = null;
+ String sampleName = sampleNameIterator.next();
// check to see if the value list is longer than the key list, which is a problem
if (nGTKeys < GTValueSplitSize)
@@ -457,8 +454,8 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
genotypeAlleleLocation = i;
else if (genotypeKeyArray[i].equals("GQ"))
GTQual = parseQual(GTValueArray[i]);
- else if (genotypeKeyArray[i].equals("FL")) // deal with genotype filters here
- genotypeFilters.addAll(parseFilters(GTValueArray[i]));
+ else if (genotypeKeyArray[i].equals("FT")) // deal with genotype filters here
+ genotypeFilters = parseFilters(GTValueArray[i]);
else
gtAttributes.put(genotypeKeyArray[i], GTValueArray[i]);
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
index 958db810e..456b0748f 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@@ -25,8 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
-import org.broad.tribble.vcf.VCFHeader;
-import org.broad.tribble.vcf.VCFHeaderLine;
+import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@@ -57,6 +56,9 @@ public class CombineVariants extends RodWalker {
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
protected String PRIORITY_STRING = null;
+ @Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
+ protected boolean printComplexMerges = false;
+
private VCFWriter vcfWriter = null;
private List priority = null;
protected EnumSet mergeOptions;
@@ -72,16 +74,21 @@ public class CombineVariants extends RodWalker {
vcfWriter = new VCFWriter(out, true);
priority = new ArrayList(Arrays.asList(PRIORITY_STRING.split(",")));
+
+ // todo -- need to merge headers in an intelligent way
+
validateAnnotateUnionArguments(priority);
mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions;
- Set samples = getSampleList(SampleUtils.getRodsWithVCFHeader(getToolkit(), null), mergeOptions);
+ Map vcfRods = SampleUtils.getRodsWithVCFHeader(getToolkit(), null);
+ Set samples = getSampleList(vcfRods, mergeOptions);
- Set metaData = new HashSet();
- metaData.add(new VCFHeaderLine("source", "VCFCombine"));
- vcfWriter.writeHeader(new VCFHeader(metaData, samples));
+ Set headerLines = smartMergeHeaders(vcfRods.values());
+ headerLines.add(new VCFHeaderLine("source", "CombineVariants"));
+ vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
}
- private Set getSampleList(Map headers, EnumSet mergeOptions ) {
+ // todo -- Eric, where's a better place to put this?
+ public static Set getSampleList(Map headers, EnumSet mergeOptions ) {
Set samples = new TreeSet();
for ( Map.Entry val : headers.entrySet() ) {
VCFHeader header = val.getValue();
@@ -93,6 +100,48 @@ public class CombineVariants extends RodWalker {
return samples;
}
+ // todo -- Eric, where's a better place to put this?
+ public static Set smartMergeHeaders(Collection headers) throws IllegalStateException {
+ HashMap map = new HashMap(); // from KEY.NAME -> line
+ HashSet lines = new HashSet();
+
+ for ( VCFHeader source : headers ) {
+ //System.out.printf("Merging in header %s%n", source);
+ for ( VCFHeaderLine line : source.getMetaData()) {
+ String key = line.getKey();
+ if ( line instanceof VCFNamedHeaderLine ) key = key + "." + ((VCFNamedHeaderLine) line).getmName();
+
+ if ( map.containsKey(key) ) {
+ VCFHeaderLine other = map.get(key);
+ if ( line.equals(other) )
+ continue;
+// System.out.printf("equals duplicate key %s%n", line);
+ else if ( ! line.getClass().equals(other.getClass()) )
+ throw new IllegalStateException("Incompatible header types: " + line + " " + other );
+ else if ( line instanceof VCFFilterHeaderLine ) {
+ String lineName = ((VCFFilterHeaderLine) line).getmName();
+ String otherName = ((VCFFilterHeaderLine) other).getmName();
+ if ( ! lineName.equals(otherName) )
+ throw new IllegalStateException("Incompatible header types: " + line + " " + other );
+ } else {
+ //String lineName = ((VCFInfoHeaderLine) line).getmName();
+ //String otherName = ((VCFFilterHeaderLine) other).getmName();
+
+ // todo -- aaron, please complete these comparisons when INFO and Format header lines are made into one
+ //if ( (lineType != null && ! lineType.equals(otherType)) || (lineCount != null && !lineCounts.equals(otherCount)))
+ // throw new IllegalStateException("Incompatible header types: " + line + " " + other );
+ }
+ } else {
+ map.put(key, line);
+ //System.out.printf("Adding header line %s%n", line);
+ }
+ }
+ }
+
+ return new HashSet(map.values());
+ }
+
+
private void validateAnnotateUnionArguments(List priority) {
Set rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null);
if ( priority == null || rodNames.size() != priority.size() )
@@ -108,7 +157,7 @@ public class CombineVariants extends RodWalker {
// get all of the vcf rods at this locus
Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation());
- VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true);
+ VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true, printComplexMerges);
if ( mergedVC != null ) // only operate at the start of events
//if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed
vcfWriter.add(mergedVC, ref.getBases());
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
index f96e49752..65ff88cab 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
@@ -28,6 +28,7 @@ public class VCFWriter {
BufferedWriter mWriter;
private boolean writingVCF40Format;
+ private String PASSES_FILTERS_STRING = null;
// our genotype sample fields
private static final List mGenotypeRecords = new ArrayList();
@@ -47,8 +48,8 @@ public class VCFWriter {
// default values
private static final String UNFILTERED = ".";
- private static final String PASSES_FILTERS = "0";
- private static final String PASSES_FILTERS_VCF_4_0 = "PASS";
+ private static final String PASSES_FILTERS_VCF3 = "0";
+ private static final String PASSES_FILTERS_VCF4 = "PASS";
private static final String EMPTY_INFO_FIELD = ".";
private static final String EMPTY_ID_FIELD = ".";
private static final String EMPTY_ALLELE_FIELD = ".";
@@ -66,12 +67,15 @@ public class VCFWriter {
public VCFWriter(File location, boolean useVCF4Format) {
this.writingVCF40Format = useVCF4Format;
+ this.PASSES_FILTERS_STRING = useVCF4Format ? PASSES_FILTERS_VCF4 : PASSES_FILTERS_VCF3;
+
FileOutputStream output;
try {
output = new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new RuntimeException("Unable to create VCF file at location: " + location);
}
+
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
@@ -87,6 +91,7 @@ public class VCFWriter {
}
public VCFWriter(OutputStream output, boolean useVCF4Format) {
this.writingVCF40Format = useVCF4Format;
+ this.PASSES_FILTERS_STRING = useVCF4Format ? PASSES_FILTERS_VCF4 : PASSES_FILTERS_VCF3;
mWriter = new BufferedWriter(new OutputStreamWriter(output));
}
@@ -238,8 +243,8 @@ public class VCFWriter {
// TODO- clean up these flags and associated code
boolean filtersWereAppliedToContext = true;
List allowedGenotypeAttributeKeys = null;
- boolean filtersWereAppliedToGenotypes = false;
- String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? PASSES_FILTERS_VCF_4_0 : UNFILTERED);
+ boolean filtersWereAppliedToGenotypes = true;
+ String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? PASSES_FILTERS_STRING : UNFILTERED);
Map alleleMap = new HashMap();
alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); // convenience for lookup
@@ -251,8 +256,6 @@ public class VCFWriter {
ArrayList originalAlleles = (ArrayList)vc.getAttribute("ORIGINAL_ALLELE_LIST");
-
-
// search for reference allele and find trailing and padding at the end.
if (originalAlleles != null) {
Allele originalReferenceAllele = null;
@@ -339,7 +342,7 @@ public class VCFWriter {
if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
vcfGenotypeAttributeKeys.add(key);
}
- if ( filtersWereAppliedToGenotypes )
+ if ( filtersWereAppliedToGenotypes ) // todo -- should be calculated
vcfGenotypeAttributeKeys.add(VCFGenotypeRecord.GENOTYPE_FILTER_KEY);
}
String genotypeFormatString = Utils.join(GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys);
@@ -382,7 +385,7 @@ public class VCFWriter {
val = pileup.size();
} else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) {
// VCF 4.0 key for no filters is "."
- val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : UNFILTERED;
+ val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : PASSES_FILTERS_STRING;
}