Refactored and generalized the VCF header info code.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2346 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-13 21:02:45 +00:00
parent 05b8782d5f
commit 97618663ef
35 changed files with 373 additions and 136 deletions

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -61,5 +62,5 @@ public class AlleleBalance extends StandardVariantAnnotation {
public String getKeyName() { return "AB"; } public String getKeyName() { return "AB"; }
public String getDescription() { return "AB,1,Float,\"Allele Balance for hets (ref/(ref+alt))\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("AB", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Balance for hets (ref/(ref+alt))"); }
} }

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
@ -19,5 +20,5 @@ public class DepthOfCoverage extends StandardVariantAnnotation {
public String getKeyName() { return VCFRecord.DEPTH_KEY; } public String getKeyName() { return VCFRecord.DEPTH_KEY; }
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total Depth (including MQ0 reads)"); }
} }

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
@ -21,7 +22,7 @@ public class HomopolymerRun extends StandardVariantAnnotation {
public String getKeyName() { return "HRun"; } public String getKeyName() { return "HRun"; }
public String getDescription() { return "HRun,1,Integer,\"Largest Contiguous Homopolymer Run of Variant Allele In Either Direction\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("HRun", 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"); }
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import java.util.Map; import java.util.Map;
@ -30,5 +31,5 @@ public class MismatchRate implements VariantAnnotation {
public String getKeyName() { return "MR"; } public String getKeyName() { return "MR"; }
public String getDescription() { return "MR,1,Float,\"Mismatch Rate of Reads Spanning This Position\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MR", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Mismatch Rate of Reads Spanning This Position"); }
} }

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
import java.util.ArrayList; import java.util.ArrayList;
@ -31,5 +32,5 @@ public class RMSMappingQuality extends StandardVariantAnnotation {
public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; } public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; }
public String getDescription() { return getKeyName() + ",1,Float,\"RMS Mapping Quality\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Float, "RMS Mapping Quality"); }
} }

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
@ -67,7 +68,7 @@ public class RankSumTest implements VariantAnnotation {
public String getKeyName() { return "RankSum"; } public String getKeyName() { return "RankSum"; }
public String getDescription() { return "RankSum,1,Float,\"Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("RankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); }
private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) { private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( PileupElement p : pileup ) { for ( PileupElement p : pileup ) {

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
@ -26,7 +27,7 @@ public class SecondBaseSkew implements VariantAnnotation {
public String getKeyName() { return KEY_NAME; } public String getKeyName() { return KEY_NAME; }
public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(KEY_NAME, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Chi-square Secondary Base Skew"); }
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) { public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
if ( !variation.isBiallelic() || !variation.isSNP() ) if ( !variation.isBiallelic() || !variation.isSNP() )

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
@ -23,5 +24,5 @@ public class SpanningDeletions extends StandardVariantAnnotation {
public String getKeyName() { return "Dels"; } public String getKeyName() { return "Dels"; }
public String getDescription() { return "Dels,1,Float,\"Fraction of Reads Containing Spanning Deletions\""; } public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("Dels", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Fraction of Reads Containing Spanning Deletions"); }
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
@ -16,6 +17,6 @@ public interface VariantAnnotation {
public String getKeyName(); public String getKeyName();
// return the description used for the VCF INFO meta field // return the description used for the VCF INFO meta field
public String getDescription(); public VCFInfoHeaderLine getDescription();
} }

View File

@ -116,10 +116,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
} }
// setup the header fields // setup the header fields
Set<String> hInfo = new HashSet<String>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add("source=VariantAnnotator"); hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
hInfo.add("annotatorReference=" + getToolkit().getArguments().referenceFile.getName()); hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
vcfHeader = new VCFHeader(hInfo, samples); vcfHeader = new VCFHeader(hInfo, samples);
@ -175,35 +175,35 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
} }
// option #1: don't specify annotations to be used: standard annotations are used by default // option #1: don't specify annotations to be used: standard annotations are used by default
public static Set<String> getVCFAnnotationDescriptions() { public static Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
if ( standardAnnotations == null ) if ( standardAnnotations == null )
determineAllAnnotations(); determineAllAnnotations();
TreeSet<String> descriptions = new TreeSet<String>(); TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( VariantAnnotation annotation : standardAnnotations.values() ) for ( VariantAnnotation annotation : standardAnnotations.values() )
descriptions.add("INFO=" + annotation.getDescription()); descriptions.add(annotation.getDescription());
return descriptions; return descriptions;
} }
// option #2: specify that all possible annotations be used // option #2: specify that all possible annotations be used
public static Set<String> getAllVCFAnnotationDescriptions() { public static Set<VCFHeaderLine> getAllVCFAnnotationDescriptions() {
if ( standardAnnotations == null ) if ( standardAnnotations == null )
determineAllAnnotations(); determineAllAnnotations();
TreeSet<String> descriptions = new TreeSet<String>(); TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( VariantAnnotation annotation : allAnnotations.values() ) for ( VariantAnnotation annotation : allAnnotations.values() )
descriptions.add("INFO=" + annotation.getDescription()); descriptions.add(annotation.getDescription());
return descriptions; return descriptions;
} }
// option #3: specify the exact annotations to be used // option #3: specify the exact annotations to be used
public static Set<String> getVCFAnnotationDescriptions(Collection<VariantAnnotation> annotations) { public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<VariantAnnotation> annotations) {
TreeSet<String> descriptions = new TreeSet<String>(); TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( VariantAnnotation annotation : annotations ) for ( VariantAnnotation annotation : annotations )
descriptions.add("INFO=" + annotation.getDescription()); descriptions.add(annotation.getDescription());
return descriptions; return descriptions;
} }

View File

@ -103,21 +103,21 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
} }
// set up the header fields // set up the header fields
Set<String> hInfo = new HashSet<String>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add("source=CallsetConcordance"); hInfo.add(new VCFHeaderLine("source", "CallsetConcordance"));
hInfo.add("note=\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\""); hInfo.add(new VCFHeaderLine("note", "\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\""));
hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes)); hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes));
VCFHeader header = new VCFHeader(hInfo, samples); VCFHeader header = new VCFHeader(hInfo, samples);
vcfWriter = new VCFWriter(header, OUTPUT); vcfWriter = new VCFWriter(header, OUTPUT);
} }
public static Set<String> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) { public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) {
TreeSet<String> descriptions = new TreeSet<String>(); TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( ConcordanceType type : types ) for ( ConcordanceType type : types )
descriptions.add("INFO=" + type.getInfoDescription()); descriptions.add(type.getInfoDescription());
return descriptions; return descriptions;
} }

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -11,5 +12,5 @@ public interface ConcordanceType {
public void initialize(Map<String,String> args, Set<String> samples); public void initialize(Map<String,String> args, Set<String> samples);
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref); public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref);
public String getInfoName(); public String getInfoName();
public String getInfoDescription(); public VCFInfoHeaderLine getInfoDescription();
} }

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.*; import java.util.*;
@ -102,5 +103,5 @@ public class IndelSubsets implements ConcordanceType {
} }
public String getInfoName() { return "IndelSubsets"; } public String getInfoName() { return "IndelSubsets"; }
public String getInfoDescription() { return getInfoName() + ",1,String,\"Indel-related subsets\""; } public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); }
} }

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.*; import java.util.*;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -39,5 +40,5 @@ public class NWayVenn implements ConcordanceType {
} }
public String getInfoName() { return "NwayVenn"; } public String getInfoName() { return "NwayVenn"; }
public String getInfoDescription() { return getInfoName() + ",1,String,\"N-way Venn split\""; } public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "N-way Venn split"); }
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.*; import java.util.*;
@ -111,5 +112,5 @@ public class SNPGenotypeConcordance implements ConcordanceType {
} }
public String getInfoName() { return "SnpConcordance"; } public String getInfoName() { return "SnpConcordance"; }
public String getInfoDescription() { return getInfoName() + ",1,String,\"SNP concordance test\""; } public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); }
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import java.util.*; import java.util.*;
@ -59,5 +60,5 @@ public class SimpleVenn implements ConcordanceType {
} }
public String getInfoName() { return "Venn"; } public String getInfoName() { return "Venn"; }
public String getInfoDescription() { return getInfoName() + ",1,String,\"2-way Venn split\""; } public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); }
} }

View File

@ -47,18 +47,19 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
private void initializeVcfWriter(RodVCF rod) { private void initializeVcfWriter(RodVCF rod) {
// setup the header fields // setup the header fields
Set<String> hInfo = new HashSet<String>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add("source=" + "VariantFiltration"); hInfo.add(new VCFHeaderLine("source", "VariantFiltration"));
hInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName()); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
if ( clusterWindow > 0 ) if ( clusterWindow > 0 )
hInfo.add("FILTER=" + CLUSTERED_SNP_FILTER_NAME + ",\"SNPs found in clusters\""); hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
if ( filterExpression != null ) if ( filterExpression != null )
hInfo.add("FILTER=" + FILTER_NAME + ",\"" + FILTER_STRING + "\""); hInfo.add(new VCFFilterHeaderLine(FILTER_NAME, FILTER_STRING));
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources(); List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) { for ( ReferenceOrderedDataSource source : dataSources ) {
if ( source.getReferenceOrderedData().getName().equals("mask") ) { if ( source.getReferenceOrderedData().getName().equals("mask") ) {
hInfo.add("FILTER=" + MASK_NAME + ",\"Overlaps a user-input mask\""); hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask"));
break; break;
} }
} }

View File

@ -37,6 +37,8 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.cmdLine.*; import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
@ -139,7 +141,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
samples.clear(); samples.clear();
// get the optional header fields // get the optional header fields
Set<String> headerInfo = getHeaderInfo(); Set<VCFHeaderLine> headerInfo = getHeaderInfo();
// create the output writer stream // create the output writer stream
if ( VARIANTS_FILE != null ) if ( VARIANTS_FILE != null )
@ -154,16 +156,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
callsMetrics = new CallMetrics(); callsMetrics = new CallMetrics();
} }
private Set<String> getHeaderInfo() { private Set<VCFHeaderLine> getHeaderInfo() {
Set<String> headerInfo = new HashSet<String>(); Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF // this is only applicable to VCF
if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF ) if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
return headerInfo; return headerInfo;
// first, the basic info // first, the basic info
headerInfo.add("source=UnifiedGenotyper"); headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName()); headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// annotation (INFO) fields from VariantAnnotator // annotation (INFO) fields from VariantAnnotator
if ( UAC.ALL_ANNOTATIONS ) if ( UAC.ALL_ANNOTATIONS )
@ -172,10 +174,10 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions()); headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper // annotation (INFO) fields from UnifiedGenotyper
headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\""); headerInfo.add(new VCFInfoHeaderLine("AF", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\""); headerInfo.add(new VCFInfoHeaderLine("NS", 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Number of Samples With Data"));
if ( !UAC.NO_SLOD ) if ( !UAC.NO_SLOD )
headerInfo.add("INFO=SB,1,Float,\"Strand Bias\""); headerInfo.add(new VCFInfoHeaderLine("SB", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
// FORMAT fields if not in POOLED mode // FORMAT fields if not in POOLED mode
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED )
@ -184,7 +186,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// all of the arguments from the argument collection // all of the arguments from the argument collection
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(Collections.<Object>singleton(UAC)); Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(Collections.<Object>singleton(UAC));
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() ) for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
headerInfo.add(String.format("UG_%s=%s", commandLineArg.getKey(), commandLineArg.getValue())); headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
return headerInfo; return headerInfo;
} }

View File

@ -59,9 +59,9 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
//Calendar cal = Calendar.getInstance(); //Calendar cal = Calendar.getInstance();
//metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH))); //metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH)));
Set<String> metaData = new HashSet<String>(); Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add("source=VariantsToVCF"); metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
metaData.add("reference=" + args.referenceFile.getAbsolutePath()); metaData.add(new VCFHeaderLine("reference", args.referenceFile.getAbsolutePath()));
Set<String> additionalColumns = new HashSet<String>(); Set<String> additionalColumns = new HashSet<String>();
additionalColumns.add("FORMAT"); additionalColumns.add("FORMAT");

View File

@ -33,9 +33,9 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
public void initializeWriter() { public void initializeWriter() {
Set<String> metaData = new HashSet<String>(); Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add("source=VariantsToVCF"); metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
metaData.add("reference=" + this.getToolkit().getArguments().referenceFile.getAbsolutePath()); metaData.add(new VCFHeaderLine("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()));
Set<String> additionalColumns = new HashSet<String>(); Set<String> additionalColumns = new HashSet<String>();
additionalColumns.add("FORMAT"); additionalColumns.add("FORMAT");

View File

@ -38,7 +38,7 @@ public class GenotypeWriterFactory {
SAMFileHeader header, SAMFileHeader header,
File destination, File destination,
Set<String> sampleNames, Set<String> sampleNames,
Set<String> headerInfo) { Set<VCFHeaderLine> headerInfo) {
switch (format) { switch (format) {
case GLF: case GLF:
return new GLFWriter(header.toString(), destination); return new GLFWriter(header.toString(), destination);
@ -57,7 +57,7 @@ public class GenotypeWriterFactory {
SAMFileHeader header, SAMFileHeader header,
PrintStream destination, PrintStream destination,
Set<String> sampleNames, Set<String> sampleNames,
Set<String> headerInfo) { Set<VCFHeaderLine> headerInfo) {
switch (format) { switch (format) {
case GELI: case GELI:
return new GeliTextWriter(destination); return new GeliTextWriter(destination);

View File

@ -0,0 +1,40 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* @author ebanks
* <p/>
* Class VCFFilterHeaderLine
* <p/>
* A class representing a key=value entry for FILTER fields in the VCF header
*/
public class VCFFilterHeaderLine extends VCFHeaderLine {
private String mName;
private String mDescription;
/**
* create a VCF filter header line
*
* @param name the name for this header line
* @param description the description for this header line
*/
public VCFFilterHeaderLine(String name, String description) {
super("FILTER", "");
mName = name;
mDescription = description;
}
protected String makeStringRep() {
return String.format("FILTER=%s,\"%s\"", mName, mDescription);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFFilterHeaderLine) )
return false;
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
return mName.equals(other.mName) &&
mDescription.equals(other.mDescription);
}
}

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* @author ebanks
* <p/>
* Class VCFFormatHeaderLine
* <p/>
* A class representing a key=value entry for genotype FORMAT fields in the VCF header
*/
public class VCFFormatHeaderLine extends VCFHeaderLine {
// the info field types
public enum INFO_TYPE {
Integer, Float, String
}
private String mName;
private int mCount;
private String mDescription;
private INFO_TYPE mType;
/**
* create a VCF format header line
*
* @param name the name for this header line
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
*/
public VCFFormatHeaderLine(String name, int count, INFO_TYPE type, String description) {
super("FORMAT", "");
mName = name;
mCount = count;
mType = type;
mDescription = description;
}
protected String makeStringRep() {
return String.format("FORMAT=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFFormatHeaderLine) )
return false;
VCFFormatHeaderLine other = (VCFFormatHeaderLine)o;
return mName.equals(other.mName) &&
mCount == other.mCount &&
mDescription.equals(other.mDescription) &&
mType == other.mType;
}
}

View File

@ -262,12 +262,12 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
return result; return result;
} }
public static Set<String> getSupportedHeaderStrings() { public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
Set<String> result = new HashSet<String>(); Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add("FORMAT=" + GENOTYPE_KEY + ",1,String,\"Genotype\""); result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.String, "Genotype"));
result.add("FORMAT=" + GENOTYPE_QUALITY_KEY + ",1,Integer,\"Genotype Quality\""); result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Genotype Quality"));
result.add("FORMAT=" + DEPTH_KEY + ",1,Integer,\"Read Depth (without MQ0 reads)\""); result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (without MQ0 reads)"));
//result.add("FORMAT=" + HAPLOTYPE_QUALITY_KEY + ",1,Integer,\"Haplotype Quality\""); //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
return result; return result;
} }
} }

View File

@ -25,7 +25,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class); protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<String> headerInfo) { public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
mSampleNames.addAll(sampleNames); mSampleNames.addAll(sampleNames);
initializeHeader(headerInfo); initializeHeader(headerInfo);
@ -34,7 +34,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter = new VCFWriter(mHeader, writeTo); mWriter = new VCFWriter(mHeader, writeTo);
} }
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<String> headerInfo) { public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
mSampleNames.addAll(sampleNames); mSampleNames.addAll(sampleNames);
initializeHeader(headerInfo); initializeHeader(headerInfo);
@ -48,11 +48,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
* *
* @param optionalHeaderInfo the optional header fields * @param optionalHeaderInfo the optional header fields
*/ */
private void initializeHeader(Set<String> optionalHeaderInfo) { private void initializeHeader(Set<VCFHeaderLine> optionalHeaderInfo) {
Set<String> hInfo = new TreeSet<String>(); Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
// setup the header fields // setup the header fields
hInfo.add(VCFHeader.FULL_FORMAT_LINE); hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
hInfo.addAll(optionalHeaderInfo); hInfo.addAll(optionalHeaderInfo);
// setup the sample names // setup the sample names

View File

@ -8,12 +8,12 @@ import java.util.*;
* <p/> * <p/>
* Class VCFHeader * Class VCFHeader
* <p/> * <p/>
* A descriptions should go here. Blame aaron if it's missing. * A class representing the VCF header
*/ */
public class VCFHeader { public class VCFHeader {
public static final String FILE_FORMAT_KEY = "fileformat="; public static final String FILE_FORMAT_KEY = "fileformat";
public static final String OLD_FILE_FORMAT_KEY = "format="; // from version 3.2 public static final String OLD_FILE_FORMAT_KEY = "format"; // from version 3.2
/** the current vcf version we support. */ /** the current vcf version we support. */
@ -22,7 +22,6 @@ public class VCFHeader {
public static final double VCF_VERSION_NUMBER = 3.3; public static final double VCF_VERSION_NUMBER = 3.3;
public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER; public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER;
public static final String FULL_FORMAT_LINE = FILE_FORMAT_KEY + VCF_VERSION;
// the manditory header fields // the manditory header fields
public enum HEADER_FIELDS { public enum HEADER_FIELDS {
@ -30,7 +29,7 @@ public class VCFHeader {
} }
// the associated meta data // the associated meta data
private final Set<String> mMetaData; private final Set<VCFHeaderLine> mMetaData;
// the list of auxillary tags // the list of auxillary tags
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>(); private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
@ -50,8 +49,8 @@ public class VCFHeader {
* *
* @param metaData the meta data associated with this header * @param metaData the meta data associated with this header
*/ */
public VCFHeader(Set<String> metaData) { public VCFHeader(Set<VCFHeaderLine> metaData) {
mMetaData = new TreeSet<String>(metaData); mMetaData = new TreeSet<VCFHeaderLine>(metaData);
checkVCFVersion(); checkVCFVersion();
} }
@ -61,8 +60,8 @@ public class VCFHeader {
* @param metaData the meta data associated with this header * @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names * @param genotypeSampleNames the genotype format field, and the sample names
*/ */
public VCFHeader(Set<String> metaData, Set<String> genotypeSampleNames) { public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
mMetaData = new TreeSet<String>(metaData); mMetaData = new TreeSet<VCFHeaderLine>(metaData);
for (String col : genotypeSampleNames) { for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT")) if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col); mGenotypeSampleNames.add(col);
@ -77,19 +76,15 @@ public class VCFHeader {
*/ */
public void checkVCFVersion() { public void checkVCFVersion() {
String version = null; String version = null;
for ( String field : mMetaData ) { for ( VCFHeaderLine line : mMetaData ) {
if ( field.startsWith(FILE_FORMAT_KEY) ) { if ( line.getKey().equals(FILE_FORMAT_KEY) || line.getKey().equals(OLD_FILE_FORMAT_KEY) ) {
version = field.substring(FILE_FORMAT_KEY.length()); version = line.getValue();
break;
}
else if ( field.startsWith(OLD_FILE_FORMAT_KEY) ) {
version = field.substring(OLD_FILE_FORMAT_KEY.length());
break; break;
} }
} }
if ( version == null ) if ( version == null )
mMetaData.add(FULL_FORMAT_LINE); mMetaData.add(new VCFHeaderLine(FILE_FORMAT_KEY, VCF_VERSION));
else if ( !isSupportedVersion(version) ) else if ( !isSupportedVersion(version) )
throw new RuntimeException("VCF version " + version + throw new RuntimeException("VCF version " + version +
" is not yet supported; only version " + VCF_VERSION + " and earlier can be used"); " is not yet supported; only version " + VCF_VERSION + " and earlier can be used");
@ -124,7 +119,7 @@ public class VCFHeader {
* *
* @return a set of the meta data * @return a set of the meta data
*/ */
public Set<String> getMetaData() { public Set<VCFHeaderLine> getMetaData() {
return mMetaData; return mMetaData;
} }

View File

@ -0,0 +1,86 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* @author ebanks
* <p/>
* Class VCFHeaderLine
* <p/>
* A class representing a key=value entry in the VCF header
*/
public class VCFHeaderLine implements Comparable {
private String stringRep = null;
private String mKey = null;
private String mValue = null;
/**
* create a VCF header line
*
* @param key the key for this header line
* @param value the value for this header line
*/
public VCFHeaderLine(String key, String value) {
mKey = key;
mValue = value;
}
/**
* Get the key
*
* @return the key
*/
public String getKey() {
return mKey;
}
/**
* Set the key
*
* @param key the key for this header line
*/
public void setKey(String key) {
mKey = key;
stringRep = null;
}
/**
* Get the value
*
* @return the value
*/
public String getValue() {
return mValue;
}
/**
* Set the value
*
* @param value the value for this header line
*/
public void setValue(String value) {
mValue = value;
stringRep = null;
}
public String toString() {
if ( stringRep == null )
stringRep = makeStringRep();
return stringRep;
}
protected String makeStringRep() {
return mKey + "=" + mValue;
}
public boolean equals(Object o) {
if ( !(o instanceof VCFHeaderLine) )
return false;
return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue());
}
public int compareTo(Object other) {
return toString().compareTo(other.toString());
}
}

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* @author ebanks
* <p/>
* Class VCFInfoHeaderLine
* <p/>
* A class representing a key=value entry for INFO fields in the VCF header
*/
public class VCFInfoHeaderLine extends VCFHeaderLine {
// the info field types
public enum INFO_TYPE {
Integer, Float, String
}
private String mName;
private int mCount;
private String mDescription;
private INFO_TYPE mType;
/**
* create a VCF info header line
*
* @param name the name for this header line
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
*/
public VCFInfoHeaderLine(String name, int count, INFO_TYPE type, String description) {
super("INFO", "");
mName = name;
mCount = count;
mType = type;
mDescription = description;
}
protected String makeStringRep() {
return String.format("INFO=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFInfoHeaderLine) )
return false;
VCFInfoHeaderLine other = (VCFInfoHeaderLine)o;
return mName.equals(other.mName) &&
mCount == other.mCount &&
mDescription.equals(other.mDescription) &&
mType == other.mType;
}
}

View File

@ -146,7 +146,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @return a VCF Header created from the list of stinrgs * @return a VCF Header created from the list of stinrgs
*/ */
protected VCFHeader createHeader(List<String> headerStrings) { protected VCFHeader createHeader(List<String> headerStrings) {
Set<String> metaData = new TreeSet<String>(); Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
Set<String> auxTags = new LinkedHashSet<String>(); Set<String> auxTags = new LinkedHashSet<String>();
// iterate over all the passed in strings // iterate over all the passed in strings
for ( String str : headerStrings ) { for ( String str : headerStrings ) {
@ -169,7 +169,9 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
arrayIndex++; arrayIndex++;
} }
} else { } else {
metaData.add(str.substring(2)); int equals = str.indexOf("=");
if ( equals != -1 )
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
} }
} }

View File

@ -27,10 +27,10 @@ public class VCFUtils {
* *
* @return a set of all fields * @return a set of all fields
*/ */
public static Set<String> getHeaderFields(GenomeAnalysisEngine toolkit) { public static Set<VCFHeaderLine> getHeaderFields(GenomeAnalysisEngine toolkit) {
// keep a map of sample name to occurrences encountered // keep a map of sample name to occurrences encountered
TreeSet<String> fields = new TreeSet<String>(); TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>();
// iterate to get all of the sample names // iterate to get all of the sample names
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources(); List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();

View File

@ -50,23 +50,21 @@ public class VCFWriter {
new OutputStreamWriter(location)); new OutputStreamWriter(location));
try { try {
// the fileformat field needs to be written first // the fileformat field needs to be written first
TreeSet<String> allMetaData = new TreeSet<String>(header.getMetaData()); TreeSet<VCFHeaderLine> nonFormatMetaData = new TreeSet<VCFHeaderLine>();
for ( String metadata : allMetaData ) { for ( VCFHeaderLine line : header.getMetaData() ) {
if ( metadata.startsWith(VCFHeader.FILE_FORMAT_KEY) ) { if ( line.getKey().equals(VCFHeader.FILE_FORMAT_KEY) ) {
mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "\n"); mWriter.write(VCFHeader.METADATA_INDICATOR + line.toString() + "\n");
break;
} }
else if ( metadata.startsWith(VCFHeader.OLD_FILE_FORMAT_KEY) ) { else if ( line.getKey().equals(VCFHeader.OLD_FILE_FORMAT_KEY) ) {
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeader.FILE_FORMAT_KEY + metadata.substring(VCFHeader.OLD_FILE_FORMAT_KEY.length()) + "\n"); mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeader.FILE_FORMAT_KEY + line.toString().substring(VCFHeader.OLD_FILE_FORMAT_KEY.length()) + "\n");
break; } else {
nonFormatMetaData.add(line);
} }
} }
// write the rest of the header meta-data out // write the rest of the header meta-data out
for ( String metadata : header.getMetaData() ) { for ( VCFHeaderLine line : nonFormatMetaData )
if ( !metadata.startsWith(VCFHeader.FILE_FORMAT_KEY) && !metadata.startsWith(VCFHeader.OLD_FILE_FORMAT_KEY) ) mWriter.write(VCFHeader.METADATA_INDICATOR + line + "\n");
mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "\n");
}
// write out the column line // write out the column line
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();

View File

@ -17,8 +17,7 @@ import java.util.*;
*/ */
public class VCFHeaderTest extends BaseTest { public class VCFHeaderTest extends BaseTest {
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>(); private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
private Set<String> metaData = new HashSet();
private Set<String> additionalColumns = new HashSet<String>(); private Set<String> additionalColumns = new HashSet<String>();
/** /**
@ -26,8 +25,8 @@ public class VCFHeaderTest extends BaseTest {
*/ */
@Test @Test
public void testHeaderConstructor() { public void testHeaderConstructor() {
metaData.add(VCFHeader.FULL_FORMAT_LINE); // required metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
metaData.add("two=2"); metaData.add(new VCFHeaderLine("two", "2"));
additionalColumns.add("extra1"); additionalColumns.add("extra1");
additionalColumns.add("extra2"); additionalColumns.add("extra2");
// this should create a header that is valid // this should create a header that is valid

View File

@ -24,17 +24,16 @@ public class VCFReaderTest extends BaseTest {
private static final File complexFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/complexExample.vcf"); private static final File complexFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/complexExample.vcf");
private static final File headerNoRecordsFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/headerNoRecords.vcf"); private static final File headerNoRecordsFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/headerNoRecords.vcf");
private static IndexedFastaSequenceFile seq;
@BeforeClass @BeforeClass
public static void beforeTests() { public static void beforeTests() {
try { try {
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta")); IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) { GenomeLocParser.setupRefContigOrdering(seq);
throw new StingException("unable to load the sequence dictionary"); } catch (FileNotFoundException e) {
} throw new StingException("unable to load the sequence dictionary");
GenomeLocParser.setupRefContigOrdering(seq); }
} }
@Test @Test
public void testVCFInput() { public void testVCFInput() {
@ -330,7 +329,6 @@ public class VCFReaderTest extends BaseTest {
public void testHeaderNoRecords() { public void testHeaderNoRecords() {
VCFReader reader = new VCFReader(headerNoRecordsFile); VCFReader reader = new VCFReader(headerNoRecordsFile);
Assert.assertTrue(reader.getHeader().getMetaData() != null); Assert.assertTrue(reader.getHeader().getMetaData() != null);
Iterator<VCFRecord> iter = reader.iterator();
Assert.assertTrue(!reader.iterator().hasNext()); Assert.assertTrue(!reader.iterator().hasNext());
} }

View File

@ -22,21 +22,20 @@ import java.io.FileNotFoundException;
*/ */
public class VCFRecordTest extends BaseTest { public class VCFRecordTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
@BeforeClass @BeforeClass
public static void beforeTests() { public static void beforeTests() {
try { try {
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLocParser.setupRefContigOrdering(seq);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary"); throw new StingException("unable to load the sequence dictionary");
} }
GenomeLocParser.setupRefContigOrdering(seq);
} }
/** /**
* create a fake VCF record * create a fake VCF record
* *
* @param infoFields the info fields
* @return a VCFRecord * @return a VCFRecord
*/ */
private static VCFRecord makeFakeVCFRecord(Map<String, String> infoFields) { private static VCFRecord makeFakeVCFRecord(Map<String, String> infoFields) {
@ -140,9 +139,9 @@ public class VCFRecordTest extends BaseTest {
* @return a fake VCF header * @return a fake VCF header
*/ */
public static VCFHeader createFakeHeader() { public static VCFHeader createFakeHeader() {
Set<String> metaData = new HashSet(); Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add(VCFHeader.FULL_FORMAT_LINE); // required metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
metaData.add("two=2"); metaData.add(new VCFHeaderLine("two", "2"));
Set<String> additionalColumns = new HashSet<String>(); Set<String> additionalColumns = new HashSet<String>();
additionalColumns.add("FORMAT"); additionalColumns.add("FORMAT");
additionalColumns.add("sample1"); additionalColumns.add("sample1");
@ -158,8 +157,6 @@ public class VCFRecordTest extends BaseTest {
Map<String, String> infoFields = new HashMap<String, String>(); Map<String, String> infoFields = new HashMap<String, String>();
infoFields.put("DP", "50"); infoFields.put("DP", "50");
VCFRecord rec = makeFakeVCFRecord(infoFields); VCFRecord rec = makeFakeVCFRecord(infoFields);
Map<String, String> metaData = new HashMap<String, String>();
List<String> additionalColumns = new ArrayList<String>();
String rep = rec.toStringEncoding(createFakeHeader()); String rep = rec.toStringEncoding(createFakeHeader());
Assert.assertTrue(stringRep.equals(rep)); Assert.assertTrue(stringRep.equals(rep));
rec.addInfoField("AB", "CD"); rec.addInfoField("AB", "CD");

View File

@ -21,21 +21,18 @@ import java.util.*;
* This class tests out the ability of the VCF writer to correctly write VCF files * This class tests out the ability of the VCF writer to correctly write VCF files
*/ */
public class VCFWriterTest extends BaseTest { public class VCFWriterTest extends BaseTest {
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>(); private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
private Set<String> metaData = new HashSet();
private Set<String> additionalColumns = new HashSet<String>(); private Set<String> additionalColumns = new HashSet<String>();
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf"); private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
private static IndexedFastaSequenceFile seq;
@BeforeClass @BeforeClass
public static void beforeTests() { public static void beforeTests() {
try { try {
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLocParser.setupRefContigOrdering(seq);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary"); throw new StingException("unable to load the sequence dictionary");
} }
GenomeLocParser.setupRefContigOrdering(seq);
} }
/** test, using the writer and reader, that we can output and input a VCF file without problems */ /** test, using the writer and reader, that we can output and input a VCF file without problems */
@ -50,7 +47,7 @@ public class VCFWriterTest extends BaseTest {
int counter = 0; int counter = 0;
// validate what we're reading in // validate what we're reading in
validateHeader(reader.getHeader()); validateHeader(reader.getHeader());
for(VCFRecord rec :reader) { for (VCFRecord rec : reader) {
counter++; counter++;
} }
Assert.assertEquals(2,counter); Assert.assertEquals(2,counter);
@ -60,11 +57,13 @@ public class VCFWriterTest extends BaseTest {
/** /**
* create a fake header of known quantity * create a fake header of known quantity
* @param metaData the header lines
* @param additionalColumns the additional column names
* @return a fake VCF header * @return a fake VCF header
*/ */
public static VCFHeader createFakeHeader(Set<String> metaData, Set<String> additionalColumns) { public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
metaData.add(VCFHeader.FULL_FORMAT_LINE); // required metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
metaData.add("two=2"); metaData.add(new VCFHeaderLine("two", "2"));
additionalColumns.add("FORMAT"); additionalColumns.add("FORMAT");
additionalColumns.add("extra1"); additionalColumns.add("extra1");
additionalColumns.add("extra2"); additionalColumns.add("extra2");