Refactored and generalized the VCF header info code.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2346 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
05b8782d5f
commit
97618663ef
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
|
@ -61,5 +62,5 @@ public class AlleleBalance extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return "AB"; }
|
||||
|
||||
public String getDescription() { return "AB,1,Float,\"Allele Balance for hets (ref/(ref+alt))\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("AB", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Balance for hets (ref/(ref+alt))"); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -19,5 +20,5 @@ public class DepthOfCoverage extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return VCFRecord.DEPTH_KEY; }
|
||||
|
||||
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total Depth (including MQ0 reads)"); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -21,7 +22,7 @@ public class HomopolymerRun extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return "HRun"; }
|
||||
|
||||
public String getDescription() { return "HRun,1,Integer,\"Largest Contiguous Homopolymer Run of Variant Allele In Either Direction\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("HRun", 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"); }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.Map;
|
||||
|
|
@ -30,5 +31,5 @@ public class MismatchRate implements VariantAnnotation {
|
|||
|
||||
public String getKeyName() { return "MR"; }
|
||||
|
||||
public String getDescription() { return "MR,1,Float,\"Mismatch Rate of Reads Spanning This Position\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MR", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Mismatch Rate of Reads Spanning This Position"); }
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -31,5 +32,5 @@ public class RMSMappingQuality extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; }
|
||||
|
||||
public String getDescription() { return getKeyName() + ",1,Float,\"RMS Mapping Quality\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Float, "RMS Mapping Quality"); }
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -67,7 +68,7 @@ public class RankSumTest implements VariantAnnotation {
|
|||
|
||||
public String getKeyName() { return "RankSum"; }
|
||||
|
||||
public String getDescription() { return "RankSum,1,Float,\"Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("RankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); }
|
||||
|
||||
private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
|
||||
for ( PileupElement p : pileup ) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
||||
|
|
@ -26,7 +27,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(KEY_NAME, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Chi-square Secondary Base Skew"); }
|
||||
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
if ( !variation.isBiallelic() || !variation.isSNP() )
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -23,5 +24,5 @@ public class SpanningDeletions extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return "Dels"; }
|
||||
|
||||
public String getDescription() { return "Dels,1,Float,\"Fraction of Reads Containing Spanning Deletions\""; }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("Dels", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Fraction of Reads Containing Spanning Deletions"); }
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -16,6 +17,6 @@ public interface VariantAnnotation {
|
|||
public String getKeyName();
|
||||
|
||||
// return the description used for the VCF INFO meta field
|
||||
public String getDescription();
|
||||
public VCFInfoHeaderLine getDescription();
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -116,10 +116,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// setup the header fields
|
||||
Set<String> hInfo = new HashSet<String>();
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add("source=VariantAnnotator");
|
||||
hInfo.add("annotatorReference=" + getToolkit().getArguments().referenceFile.getName());
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
|
||||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
|
||||
|
||||
vcfHeader = new VCFHeader(hInfo, samples);
|
||||
|
|
@ -175,35 +175,35 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// option #1: don't specify annotations to be used: standard annotations are used by default
|
||||
public static Set<String> getVCFAnnotationDescriptions() {
|
||||
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
||||
if ( standardAnnotations == null )
|
||||
determineAllAnnotations();
|
||||
|
||||
TreeSet<String> descriptions = new TreeSet<String>();
|
||||
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
|
||||
for ( VariantAnnotation annotation : standardAnnotations.values() )
|
||||
descriptions.add("INFO=" + annotation.getDescription());
|
||||
descriptions.add(annotation.getDescription());
|
||||
|
||||
return descriptions;
|
||||
}
|
||||
|
||||
// option #2: specify that all possible annotations be used
|
||||
public static Set<String> getAllVCFAnnotationDescriptions() {
|
||||
public static Set<VCFHeaderLine> getAllVCFAnnotationDescriptions() {
|
||||
if ( standardAnnotations == null )
|
||||
determineAllAnnotations();
|
||||
|
||||
TreeSet<String> descriptions = new TreeSet<String>();
|
||||
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
|
||||
for ( VariantAnnotation annotation : allAnnotations.values() )
|
||||
descriptions.add("INFO=" + annotation.getDescription());
|
||||
descriptions.add(annotation.getDescription());
|
||||
|
||||
return descriptions;
|
||||
}
|
||||
|
||||
// option #3: specify the exact annotations to be used
|
||||
public static Set<String> getVCFAnnotationDescriptions(Collection<VariantAnnotation> annotations) {
|
||||
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<VariantAnnotation> annotations) {
|
||||
|
||||
TreeSet<String> descriptions = new TreeSet<String>();
|
||||
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
|
||||
for ( VariantAnnotation annotation : annotations )
|
||||
descriptions.add("INFO=" + annotation.getDescription());
|
||||
descriptions.add(annotation.getDescription());
|
||||
|
||||
return descriptions;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,21 +103,21 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// set up the header fields
|
||||
Set<String> hInfo = new HashSet<String>();
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add("source=CallsetConcordance");
|
||||
hInfo.add("note=\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\"");
|
||||
hInfo.add(new VCFHeaderLine("source", "CallsetConcordance"));
|
||||
hInfo.add(new VCFHeaderLine("note", "\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\""));
|
||||
hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes));
|
||||
VCFHeader header = new VCFHeader(hInfo, samples);
|
||||
|
||||
vcfWriter = new VCFWriter(header, OUTPUT);
|
||||
}
|
||||
|
||||
public static Set<String> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) {
|
||||
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<ConcordanceType> types) {
|
||||
|
||||
TreeSet<String> descriptions = new TreeSet<String>();
|
||||
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
|
||||
for ( ConcordanceType type : types )
|
||||
descriptions.add("INFO=" + type.getInfoDescription());
|
||||
descriptions.add(type.getInfoDescription());
|
||||
|
||||
return descriptions;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
|
@ -11,5 +12,5 @@ public interface ConcordanceType {
|
|||
public void initialize(Map<String,String> args, Set<String> samples);
|
||||
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref);
|
||||
public String getInfoName();
|
||||
public String getInfoDescription();
|
||||
public VCFInfoHeaderLine getInfoDescription();
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -102,5 +103,5 @@ public class IndelSubsets implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "IndelSubsets"; }
|
||||
public String getInfoDescription() { return getInfoName() + ",1,String,\"Indel-related subsets\""; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); }
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Map.Entry;
|
||||
|
|
@ -39,5 +40,5 @@ public class NWayVenn implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "NwayVenn"; }
|
||||
public String getInfoDescription() { return getInfoName() + ",1,String,\"N-way Venn split\""; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "N-way Venn split"); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -111,5 +112,5 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "SnpConcordance"; }
|
||||
public String getInfoDescription() { return getInfoName() + ",1,String,\"SNP concordance test\""; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); }
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -59,5 +60,5 @@ public class SimpleVenn implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "Venn"; }
|
||||
public String getInfoDescription() { return getInfoName() + ",1,String,\"2-way Venn split\""; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,18 +47,19 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
private void initializeVcfWriter(RodVCF rod) {
|
||||
// setup the header fields
|
||||
Set<String> hInfo = new HashSet<String>();
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add("source=" + "VariantFiltration");
|
||||
hInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName());
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantFiltration"));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
if ( clusterWindow > 0 )
|
||||
hInfo.add("FILTER=" + CLUSTERED_SNP_FILTER_NAME + ",\"SNPs found in clusters\"");
|
||||
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
|
||||
if ( filterExpression != null )
|
||||
hInfo.add("FILTER=" + FILTER_NAME + ",\"" + FILTER_STRING + "\"");
|
||||
hInfo.add(new VCFFilterHeaderLine(FILTER_NAME, FILTER_STRING));
|
||||
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
if ( source.getReferenceOrderedData().getName().equals("mask") ) {
|
||||
hInfo.add("FILTER=" + MASK_NAME + ",\"Overlaps a user-input mask\"");
|
||||
hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.utils.cmdLine.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
|
|
@ -139,7 +141,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
samples.clear();
|
||||
|
||||
// get the optional header fields
|
||||
Set<String> headerInfo = getHeaderInfo();
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
|
||||
// create the output writer stream
|
||||
if ( VARIANTS_FILE != null )
|
||||
|
|
@ -154,16 +156,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
callsMetrics = new CallMetrics();
|
||||
}
|
||||
|
||||
private Set<String> getHeaderInfo() {
|
||||
Set<String> headerInfo = new HashSet<String>();
|
||||
private Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// this is only applicable to VCF
|
||||
if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
|
||||
return headerInfo;
|
||||
|
||||
// first, the basic info
|
||||
headerInfo.add("source=UnifiedGenotyper");
|
||||
headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName());
|
||||
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
|
||||
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
// annotation (INFO) fields from VariantAnnotator
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
|
|
@ -172,10 +174,10 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\"");
|
||||
headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\"");
|
||||
headerInfo.add(new VCFInfoHeaderLine("AF", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
|
||||
headerInfo.add(new VCFInfoHeaderLine("NS", 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Number of Samples With Data"));
|
||||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add("INFO=SB,1,Float,\"Strand Bias\"");
|
||||
headerInfo.add(new VCFInfoHeaderLine("SB", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
|
||||
|
||||
// FORMAT fields if not in POOLED mode
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED )
|
||||
|
|
@ -184,7 +186,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
// all of the arguments from the argument collection
|
||||
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(Collections.<Object>singleton(UAC));
|
||||
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
|
||||
headerInfo.add(String.format("UG_%s=%s", commandLineArg.getKey(), commandLineArg.getValue()));
|
||||
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,9 +59,9 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
//Calendar cal = Calendar.getInstance();
|
||||
//metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH)));
|
||||
|
||||
Set<String> metaData = new HashSet<String>();
|
||||
metaData.add("source=VariantsToVCF");
|
||||
metaData.add("reference=" + args.referenceFile.getAbsolutePath());
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
metaData.add(new VCFHeaderLine("reference", args.referenceFile.getAbsolutePath()));
|
||||
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
additionalColumns.add("FORMAT");
|
||||
|
|
|
|||
|
|
@ -33,9 +33,9 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
|
||||
public void initializeWriter() {
|
||||
|
||||
Set<String> metaData = new HashSet<String>();
|
||||
metaData.add("source=VariantsToVCF");
|
||||
metaData.add("reference=" + this.getToolkit().getArguments().referenceFile.getAbsolutePath());
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
metaData.add(new VCFHeaderLine("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()));
|
||||
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
additionalColumns.add("FORMAT");
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class GenotypeWriterFactory {
|
|||
SAMFileHeader header,
|
||||
File destination,
|
||||
Set<String> sampleNames,
|
||||
Set<String> headerInfo) {
|
||||
Set<VCFHeaderLine> headerInfo) {
|
||||
switch (format) {
|
||||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
|
|
@ -57,7 +57,7 @@ public class GenotypeWriterFactory {
|
|||
SAMFileHeader header,
|
||||
PrintStream destination,
|
||||
Set<String> sampleNames,
|
||||
Set<String> headerInfo) {
|
||||
Set<VCFHeaderLine> headerInfo) {
|
||||
switch (format) {
|
||||
case GELI:
|
||||
return new GeliTextWriter(destination);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFFilterHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||
*/
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine {
|
||||
|
||||
private String mName;
|
||||
private String mDescription;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFilterHeaderLine(String name, String description) {
|
||||
super("FILTER", "");
|
||||
mName = name;
|
||||
mDescription = description;
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return String.format("FILTER=%s,\"%s\"", mName, mDescription);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFilterHeaderLine) )
|
||||
return false;
|
||||
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
|
||||
return mName.equals(other.mName) &&
|
||||
mDescription.equals(other.mDescription);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFFormatHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for genotype FORMAT fields in the VCF header
|
||||
*/
|
||||
public class VCFFormatHeaderLine extends VCFHeaderLine {
|
||||
|
||||
// the info field types
|
||||
public enum INFO_TYPE {
|
||||
Integer, Float, String
|
||||
}
|
||||
|
||||
private String mName;
|
||||
private int mCount;
|
||||
private String mDescription;
|
||||
private INFO_TYPE mType;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFormatHeaderLine(String name, int count, INFO_TYPE type, String description) {
|
||||
super("FORMAT", "");
|
||||
mName = name;
|
||||
mCount = count;
|
||||
mType = type;
|
||||
mDescription = description;
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return String.format("FORMAT=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFormatHeaderLine) )
|
||||
return false;
|
||||
VCFFormatHeaderLine other = (VCFFormatHeaderLine)o;
|
||||
return mName.equals(other.mName) &&
|
||||
mCount == other.mCount &&
|
||||
mDescription.equals(other.mDescription) &&
|
||||
mType == other.mType;
|
||||
}
|
||||
}
|
||||
|
|
@ -262,12 +262,12 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static Set<String> getSupportedHeaderStrings() {
|
||||
Set<String> result = new HashSet<String>();
|
||||
result.add("FORMAT=" + GENOTYPE_KEY + ",1,String,\"Genotype\"");
|
||||
result.add("FORMAT=" + GENOTYPE_QUALITY_KEY + ",1,Integer,\"Genotype Quality\"");
|
||||
result.add("FORMAT=" + DEPTH_KEY + ",1,Integer,\"Read Depth (without MQ0 reads)\"");
|
||||
//result.add("FORMAT=" + HAPLOTYPE_QUALITY_KEY + ",1,Integer,\"Haplotype Quality\"");
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (without MQ0 reads)"));
|
||||
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
|
||||
|
||||
|
||||
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<String> headerInfo) {
|
||||
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
initializeHeader(headerInfo);
|
||||
|
|
@ -34,7 +34,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
mWriter = new VCFWriter(mHeader, writeTo);
|
||||
}
|
||||
|
||||
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<String> headerInfo) {
|
||||
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
initializeHeader(headerInfo);
|
||||
|
|
@ -48,11 +48,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
*
|
||||
* @param optionalHeaderInfo the optional header fields
|
||||
*/
|
||||
private void initializeHeader(Set<String> optionalHeaderInfo) {
|
||||
Set<String> hInfo = new TreeSet<String>();
|
||||
private void initializeHeader(Set<VCFHeaderLine> optionalHeaderInfo) {
|
||||
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
|
||||
|
||||
// setup the header fields
|
||||
hInfo.add(VCFHeader.FULL_FORMAT_LINE);
|
||||
hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
hInfo.addAll(optionalHeaderInfo);
|
||||
|
||||
// setup the sample names
|
||||
|
|
|
|||
|
|
@ -8,12 +8,12 @@ import java.util.*;
|
|||
* <p/>
|
||||
* Class VCFHeader
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
* A class representing the VCF header
|
||||
*/
|
||||
public class VCFHeader {
|
||||
|
||||
public static final String FILE_FORMAT_KEY = "fileformat=";
|
||||
public static final String OLD_FILE_FORMAT_KEY = "format="; // from version 3.2
|
||||
public static final String FILE_FORMAT_KEY = "fileformat";
|
||||
public static final String OLD_FILE_FORMAT_KEY = "format"; // from version 3.2
|
||||
|
||||
|
||||
/** the current vcf version we support. */
|
||||
|
|
@ -22,7 +22,6 @@ public class VCFHeader {
|
|||
public static final double VCF_VERSION_NUMBER = 3.3;
|
||||
public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER;
|
||||
|
||||
public static final String FULL_FORMAT_LINE = FILE_FORMAT_KEY + VCF_VERSION;
|
||||
|
||||
// the manditory header fields
|
||||
public enum HEADER_FIELDS {
|
||||
|
|
@ -30,7 +29,7 @@ public class VCFHeader {
|
|||
}
|
||||
|
||||
// the associated meta data
|
||||
private final Set<String> mMetaData;
|
||||
private final Set<VCFHeaderLine> mMetaData;
|
||||
|
||||
// the list of auxillary tags
|
||||
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||
|
|
@ -50,8 +49,8 @@ public class VCFHeader {
|
|||
*
|
||||
* @param metaData the meta data associated with this header
|
||||
*/
|
||||
public VCFHeader(Set<String> metaData) {
|
||||
mMetaData = new TreeSet<String>(metaData);
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
|
||||
checkVCFVersion();
|
||||
}
|
||||
|
||||
|
|
@ -61,8 +60,8 @@ public class VCFHeader {
|
|||
* @param metaData the meta data associated with this header
|
||||
* @param genotypeSampleNames the genotype format field, and the sample names
|
||||
*/
|
||||
public VCFHeader(Set<String> metaData, Set<String> genotypeSampleNames) {
|
||||
mMetaData = new TreeSet<String>(metaData);
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
|
||||
for (String col : genotypeSampleNames) {
|
||||
if (!col.equals("FORMAT"))
|
||||
mGenotypeSampleNames.add(col);
|
||||
|
|
@ -77,19 +76,15 @@ public class VCFHeader {
|
|||
*/
|
||||
public void checkVCFVersion() {
|
||||
String version = null;
|
||||
for ( String field : mMetaData ) {
|
||||
if ( field.startsWith(FILE_FORMAT_KEY) ) {
|
||||
version = field.substring(FILE_FORMAT_KEY.length());
|
||||
break;
|
||||
}
|
||||
else if ( field.startsWith(OLD_FILE_FORMAT_KEY) ) {
|
||||
version = field.substring(OLD_FILE_FORMAT_KEY.length());
|
||||
for ( VCFHeaderLine line : mMetaData ) {
|
||||
if ( line.getKey().equals(FILE_FORMAT_KEY) || line.getKey().equals(OLD_FILE_FORMAT_KEY) ) {
|
||||
version = line.getValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( version == null )
|
||||
mMetaData.add(FULL_FORMAT_LINE);
|
||||
mMetaData.add(new VCFHeaderLine(FILE_FORMAT_KEY, VCF_VERSION));
|
||||
else if ( !isSupportedVersion(version) )
|
||||
throw new RuntimeException("VCF version " + version +
|
||||
" is not yet supported; only version " + VCF_VERSION + " and earlier can be used");
|
||||
|
|
@ -124,7 +119,7 @@ public class VCFHeader {
|
|||
*
|
||||
* @return a set of the meta data
|
||||
*/
|
||||
public Set<String> getMetaData() {
|
||||
public Set<VCFHeaderLine> getMetaData() {
|
||||
return mMetaData;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry in the VCF header
|
||||
*/
|
||||
public class VCFHeaderLine implements Comparable {
|
||||
|
||||
private String stringRep = null;
|
||||
private String mKey = null;
|
||||
private String mValue = null;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF header line
|
||||
*
|
||||
* @param key the key for this header line
|
||||
* @param value the value for this header line
|
||||
*/
|
||||
public VCFHeaderLine(String key, String value) {
|
||||
mKey = key;
|
||||
mValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the key
|
||||
*
|
||||
* @return the key
|
||||
*/
|
||||
public String getKey() {
|
||||
return mKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the key
|
||||
*
|
||||
* @param key the key for this header line
|
||||
*/
|
||||
public void setKey(String key) {
|
||||
mKey = key;
|
||||
stringRep = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value
|
||||
*
|
||||
* @return the value
|
||||
*/
|
||||
public String getValue() {
|
||||
return mValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value
|
||||
*
|
||||
* @param value the value for this header line
|
||||
*/
|
||||
public void setValue(String value) {
|
||||
mValue = value;
|
||||
stringRep = null;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
if ( stringRep == null )
|
||||
stringRep = makeStringRep();
|
||||
return stringRep;
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return mKey + "=" + mValue;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFHeaderLine) )
|
||||
return false;
|
||||
return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue());
|
||||
}
|
||||
|
||||
public int compareTo(Object other) {
|
||||
return toString().compareTo(other.toString());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFInfoHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for INFO fields in the VCF header
|
||||
*/
|
||||
public class VCFInfoHeaderLine extends VCFHeaderLine {
|
||||
|
||||
// the info field types
|
||||
public enum INFO_TYPE {
|
||||
Integer, Float, String
|
||||
}
|
||||
|
||||
private String mName;
|
||||
private int mCount;
|
||||
private String mDescription;
|
||||
private INFO_TYPE mType;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFInfoHeaderLine(String name, int count, INFO_TYPE type, String description) {
|
||||
super("INFO", "");
|
||||
mName = name;
|
||||
mCount = count;
|
||||
mType = type;
|
||||
mDescription = description;
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return String.format("INFO=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFInfoHeaderLine) )
|
||||
return false;
|
||||
VCFInfoHeaderLine other = (VCFInfoHeaderLine)o;
|
||||
return mName.equals(other.mName) &&
|
||||
mCount == other.mCount &&
|
||||
mDescription.equals(other.mDescription) &&
|
||||
mType == other.mType;
|
||||
}
|
||||
}
|
||||
|
|
@ -146,7 +146,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
* @return a VCF Header created from the list of stinrgs
|
||||
*/
|
||||
protected VCFHeader createHeader(List<String> headerStrings) {
|
||||
Set<String> metaData = new TreeSet<String>();
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<String> auxTags = new LinkedHashSet<String>();
|
||||
// iterate over all the passed in strings
|
||||
for ( String str : headerStrings ) {
|
||||
|
|
@ -169,7 +169,9 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
arrayIndex++;
|
||||
}
|
||||
} else {
|
||||
metaData.add(str.substring(2));
|
||||
int equals = str.indexOf("=");
|
||||
if ( equals != -1 )
|
||||
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,10 +27,10 @@ public class VCFUtils {
|
|||
*
|
||||
* @return a set of all fields
|
||||
*/
|
||||
public static Set<String> getHeaderFields(GenomeAnalysisEngine toolkit) {
|
||||
public static Set<VCFHeaderLine> getHeaderFields(GenomeAnalysisEngine toolkit) {
|
||||
|
||||
// keep a map of sample name to occurrences encountered
|
||||
TreeSet<String> fields = new TreeSet<String>();
|
||||
TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>();
|
||||
|
||||
// iterate to get all of the sample names
|
||||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
|
|
|
|||
|
|
@ -50,24 +50,22 @@ public class VCFWriter {
|
|||
new OutputStreamWriter(location));
|
||||
try {
|
||||
// the fileformat field needs to be written first
|
||||
TreeSet<String> allMetaData = new TreeSet<String>(header.getMetaData());
|
||||
for ( String metadata : allMetaData ) {
|
||||
if ( metadata.startsWith(VCFHeader.FILE_FORMAT_KEY) ) {
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "\n");
|
||||
break;
|
||||
TreeSet<VCFHeaderLine> nonFormatMetaData = new TreeSet<VCFHeaderLine>();
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line.getKey().equals(VCFHeader.FILE_FORMAT_KEY) ) {
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + line.toString() + "\n");
|
||||
}
|
||||
else if ( metadata.startsWith(VCFHeader.OLD_FILE_FORMAT_KEY) ) {
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeader.FILE_FORMAT_KEY + metadata.substring(VCFHeader.OLD_FILE_FORMAT_KEY.length()) + "\n");
|
||||
break;
|
||||
else if ( line.getKey().equals(VCFHeader.OLD_FILE_FORMAT_KEY) ) {
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeader.FILE_FORMAT_KEY + line.toString().substring(VCFHeader.OLD_FILE_FORMAT_KEY.length()) + "\n");
|
||||
} else {
|
||||
nonFormatMetaData.add(line);
|
||||
}
|
||||
}
|
||||
|
||||
// write the rest of the header meta-data out
|
||||
for ( String metadata : header.getMetaData() ) {
|
||||
if ( !metadata.startsWith(VCFHeader.FILE_FORMAT_KEY) && !metadata.startsWith(VCFHeader.OLD_FILE_FORMAT_KEY) )
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "\n");
|
||||
}
|
||||
|
||||
for ( VCFHeaderLine line : nonFormatMetaData )
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + line + "\n");
|
||||
|
||||
// write out the column line
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append(VCFHeader.HEADER_INDICATOR);
|
||||
|
|
|
|||
|
|
@ -17,8 +17,7 @@ import java.util.*;
|
|||
*/
|
||||
public class VCFHeaderTest extends BaseTest {
|
||||
|
||||
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||
private Set<String> metaData = new HashSet();
|
||||
private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
private Set<String> additionalColumns = new HashSet<String>();
|
||||
|
||||
/**
|
||||
|
|
@ -26,8 +25,8 @@ public class VCFHeaderTest extends BaseTest {
|
|||
*/
|
||||
@Test
|
||||
public void testHeaderConstructor() {
|
||||
metaData.add(VCFHeader.FULL_FORMAT_LINE); // required
|
||||
metaData.add("two=2");
|
||||
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
metaData.add(new VCFHeaderLine("two", "2"));
|
||||
additionalColumns.add("extra1");
|
||||
additionalColumns.add("extra2");
|
||||
// this should create a header that is valid
|
||||
|
|
|
|||
|
|
@ -24,17 +24,16 @@ public class VCFReaderTest extends BaseTest {
|
|||
private static final File complexFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/complexExample.vcf");
|
||||
private static final File headerNoRecordsFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/headerNoRecords.vcf");
|
||||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCFInput() {
|
||||
|
|
@ -330,7 +329,6 @@ public class VCFReaderTest extends BaseTest {
|
|||
public void testHeaderNoRecords() {
|
||||
VCFReader reader = new VCFReader(headerNoRecordsFile);
|
||||
Assert.assertTrue(reader.getHeader().getMetaData() != null);
|
||||
Iterator<VCFRecord> iter = reader.iterator();
|
||||
Assert.assertTrue(!reader.iterator().hasNext());
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,21 +22,20 @@ import java.io.FileNotFoundException;
|
|||
*/
|
||||
public class VCFRecordTest extends BaseTest {
|
||||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake VCF record
|
||||
*
|
||||
* @param infoFields the info fields
|
||||
* @return a VCFRecord
|
||||
*/
|
||||
private static VCFRecord makeFakeVCFRecord(Map<String, String> infoFields) {
|
||||
|
|
@ -140,9 +139,9 @@ public class VCFRecordTest extends BaseTest {
|
|||
* @return a fake VCF header
|
||||
*/
|
||||
public static VCFHeader createFakeHeader() {
|
||||
Set<String> metaData = new HashSet();
|
||||
metaData.add(VCFHeader.FULL_FORMAT_LINE); // required
|
||||
metaData.add("two=2");
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
metaData.add(new VCFHeaderLine("two", "2"));
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.add("sample1");
|
||||
|
|
@ -158,8 +157,6 @@ public class VCFRecordTest extends BaseTest {
|
|||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
infoFields.put("DP", "50");
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
Map<String, String> metaData = new HashMap<String, String>();
|
||||
List<String> additionalColumns = new ArrayList<String>();
|
||||
String rep = rec.toStringEncoding(createFakeHeader());
|
||||
Assert.assertTrue(stringRep.equals(rep));
|
||||
rec.addInfoField("AB", "CD");
|
||||
|
|
|
|||
|
|
@ -21,21 +21,18 @@ import java.util.*;
|
|||
* This class tests out the ability of the VCF writer to correctly write VCF files
|
||||
*/
|
||||
public class VCFWriterTest extends BaseTest {
|
||||
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||
private Set<String> metaData = new HashSet();
|
||||
private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
private Set<String> additionalColumns = new HashSet<String>();
|
||||
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
|
||||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
/** test, using the writer and reader, that we can output and input a VCF file without problems */
|
||||
|
|
@ -50,21 +47,23 @@ public class VCFWriterTest extends BaseTest {
|
|||
int counter = 0;
|
||||
// validate what we're reading in
|
||||
validateHeader(reader.getHeader());
|
||||
for(VCFRecord rec :reader) {
|
||||
for (VCFRecord rec : reader) {
|
||||
counter++;
|
||||
}
|
||||
Assert.assertEquals(2,counter);
|
||||
Assert.assertEquals(2,counter);
|
||||
reader.close();
|
||||
fakeVCFFile.delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake header of known quantity
|
||||
* @param metaData the header lines
|
||||
* @param additionalColumns the additional column names
|
||||
* @return a fake VCF header
|
||||
*/
|
||||
public static VCFHeader createFakeHeader(Set<String> metaData, Set<String> additionalColumns) {
|
||||
metaData.add(VCFHeader.FULL_FORMAT_LINE); // required
|
||||
metaData.add("two=2");
|
||||
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
|
||||
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
metaData.add(new VCFHeaderLine("two", "2"));
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.add("extra1");
|
||||
additionalColumns.add("extra2");
|
||||
|
|
|
|||
Loading…
Reference in New Issue