diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 2696ed366..4c44cc758 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -14,7 +14,6 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; @@ -250,170 +249,6 @@ public class VariantContextAdaptors { return new VCFHeader(hInfo == null ? new HashSet() : hInfo, names); } - public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase) { - List allowedGenotypeAttributeKeys = null; - boolean filtersWereAppliedToContext = true; - boolean filtersWereAppliedToGenotypes = false; - - // deal with the reference - String referenceBases = new String(vc.getReference().getBases()); - - String contig = vc.getLocation().getContig(); - long position = vc.getLocation().getStart(); - String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFConstants.EMPTY_ID_FIELD; - double qual = vc.hasNegLog10PError() ? vc.getPhredScaledQual() : -1; - - String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v3 : VCFConstants.UNFILTERED); - - Map alleleMap = new HashMap(); - alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup - List vcfAltAlleles = new ArrayList(); - for ( Allele a : vc.getAlleles() ) { - - VCFGenotypeEncoding encoding; - - // This is tricky because the VCF spec states that the reference must be a single - // character, whereas the reference alleles for deletions of length > 1 are strings. - // To be safe, we require the reference base be passed in and we use that whenever - // we know that the given allele is the reference. - - String alleleString = new String(a.getBases()); - if ( vc.getType() == VariantContext.Type.MIXED ) { - throw new UnsupportedOperationException("Conversion from a mixed type isn't currently supported"); - } else if ( vc.getType() == VariantContext.Type.INDEL ) { - if ( a.isNull() ) { - if ( a.isReference() ) { - // ref, where alt is insertion - encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase)); - } else { - // non-ref deletion - encoding = new VCFGenotypeEncoding("D" + Integer.toString(referenceBases.length())); - } - } else if ( a.isReference() ) { - // ref, where alt is deletion - encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase)); - } else { - // non-ref insertion - encoding = new VCFGenotypeEncoding("I" + alleleString); - } - } else if ( vc.getType() == VariantContext.Type.NO_VARIATION ) { - // ref - encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase)); - } else { - // ref or alt for snp - encoding = new VCFGenotypeEncoding(alleleString); - } - - if ( a.isNonReference() ) { - vcfAltAlleles.add(encoding); - } - - alleleMap.put(a, encoding); - } - - List vcfGenotypeAttributeKeys = new ArrayList(); - if ( vc.hasGenotypes() ) { - vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); - for ( String key : calcVCFGenotypeKeys(vc) ) { - if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) ) - vcfGenotypeAttributeKeys.add(key); - } - if ( filtersWereAppliedToGenotypes ) - vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_FILTER_KEY); - } - String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys); - - List genotypeObjects = new ArrayList(vc.getGenotypes().size()); - for ( Genotype g : vc.getGenotypesSortedByName() ) { - List encodings = new ArrayList(g.getPloidy()); - - for ( Allele a : g.getAlleles() ) { - encodings.add(alleleMap.get(a)); - } - - VCFGenotypeRecord.PHASE phasing = g.genotypesArePhased() ? VCFGenotypeRecord.PHASE.PHASED : VCFGenotypeRecord.PHASE.UNPHASED; - VCFGenotypeRecord vcfG = new VCFGenotypeRecord(g.getSampleName(), encodings, phasing); - - for ( String key : vcfGenotypeAttributeKeys ) { - if ( key.equals(VCFConstants.GENOTYPE_KEY) ) - continue; - - Object val = g.getAttribute(key); - // some exceptions - if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) { - if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 ) - val = VCFConstants.MISSING_GENOTYPE_QUALITY_v3; - else - val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE); - } else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) { - ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); - if ( pileup != null ) - val = pileup.size(); - } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { - val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v3; - } - - String outputValue = formatVCFField(key, val); - if ( outputValue != null ) - vcfG.setField(key, outputValue); - } - - genotypeObjects.add(vcfG); - } - // info fields - Map infoFields = new HashMap(); - for ( Map.Entry elt : vc.getAttributes().entrySet() ) { - String key = elt.getKey(); - if ( key.equals("ID") ) - continue; - - String outputValue = formatVCFField(key, elt.getValue()); - if ( outputValue != null ) - infoFields.put(key, outputValue); - } - - return new VCFRecord(Character.toString((char)vcfRefBase), contig, position, ID, vcfAltAlleles, qual, filters, infoFields, genotypeFormatString, genotypeObjects); - } - - private static String formatVCFField(String key, Object val) { - String result; - if ( val == null ) - result = VCFGenotypeRecord.getMissingFieldValue(key); - else if ( val instanceof Double ) - result = String.format("%.2f", (Double)val); - else if ( val instanceof Boolean ) - result = (Boolean)val ? "" : null; // empty string for true, null for false - else if ( val instanceof List ) { - List list = (List)val; - if ( list.size() == 0 ) - return formatVCFField(key, null); - StringBuffer sb = new StringBuffer(formatVCFField(key, list.get(0))); - for ( int i = 1; i < list.size(); i++) { - sb.append(","); - sb.append(formatVCFField(key, list.get(i))); - } - result = sb.toString(); - } else - result = val.toString(); - - return result; - } - - private static List calcVCFGenotypeKeys(VariantContext vc) { - Set keys = new HashSet(); - - boolean sawGoodQual = false; - for ( Genotype g : vc.getGenotypes().values() ) { - keys.addAll(g.getAttributes().keySet()); - if ( g.hasNegLog10PError() ) - sawGoodQual = true; - } - - if ( sawGoodQual ) - keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); - return Utils.sorted(new ArrayList(keys)); - } - // -------------------------------------------------------------------------------------------------------------- // // Plink to VariantContext diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index b800983ed..2c1c640fb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -547,7 +547,7 @@ public class VariantEvalWalker extends RodWalker { wroteHeader = true; } - writer.addRecord(VariantContextAdaptors.toVCF(mvc, ref)); + writer.add(mvc, new byte[]{ref}); //interestingReasons.clear(); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index 6b01b45e5..7eda06a26 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -5,6 +5,8 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; @@ -95,22 +97,19 @@ public class IndelAnnotator extends RodWalker{ if ( tracker == null ) return 0; - List rods = tracker.getReferenceMetaData("variant"); + VariantContext vc = tracker.getVariantContext(ref, "variant", null, con.getLocation(), true); // ignore places where we don't have a variant - if ( rods.size() == 0 ) + if ( vc == null ) return 0; - Object variant = rods.get(0); + RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus())); + String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); + annotationString = annotationString.split("\\s+")[0]; - if ( variant instanceof VCFRecord) { - RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus())); - String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); - annotationString = annotationString.split("\\s+")[0]; - ((VCFRecord) variant).addInfoField("type",annotationString); - vcfWriter.addRecord((VCFRecord) variant); - } else { - throw new StingException("This one-off walker only deals with VCF files."); - } + Map attrs = new HashMap(vc.getAttributes()); + attrs.put("type",annotationString); + vc = VariantContextUtils.modifyAttributes(vc, attrs); + vcfWriter.add(vc, new byte[]{ref.getBase()}); return 1; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java index 28a733a67..4a9b2d5b6 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java @@ -142,12 +142,12 @@ public class IndelDBRateWalker extends RodWalker if ( vcfWriter != null ) { int i = 0; while ( i < compContexts.size() && compContexts.get(i).getLocation().isBefore(evalContexts.get(0).getLocation())) { - vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),compContexts.get(i).getReference().getBases()[0])); + vcfWriter.add(compContexts.get(i), new byte[]{compContexts.get(i).getReference().getBases()[0]}); i++; } - vcfWriter.addRecord(VariantContextAdaptors.toVCF(evalContexts.get(0),ref.getBase())); + vcfWriter.add(evalContexts.get(0), new byte[]{ref.getBase()}); while ( i < compContexts.size() && compContexts.get(i).getLocation().distance(evalContexts.get(0).getLocation()) <= indelWindow) { - vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),compContexts.get(i).getReference().getBases()[0])); + vcfWriter.add(compContexts.get(i), new byte[]{compContexts.get(i).getReference().getBases()[0]}); i++; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index e5b18455b..67eff08a3 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; @@ -602,7 +601,7 @@ public class MendelianViolationClassifier extends LocusWalker { - - private VCFWriter vcfWriter; - - public void initialize() { - TreeSet samples = new TreeSet(); - SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap, String>()); - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("source", "VariantAnnotator")); - vcfWriter = new VCFWriter(out); - VCFHeader vcfHeader = new VCFHeader(hInfo, samples); - vcfWriter.writeHeader(vcfHeader); - } - - public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext context, AlignmentContext alicon) { - if ( tracker == null ) { - return null; - } - List rods = tracker.getReferenceMetaData("fixme"); - Object rod = rods.get(0); - VCFRecord vcfrod = null; - if ( rod instanceof VCFRecord ) { - vcfrod = (VCFRecord) rod; - } - - if (vcfrod != null) vcfrod.setReferenceBase(new String(context.getBases())); - return vcfrod; - - /* - VariantContext vcon = null; - if ( rod instanceof RodVCF) { - vcon = VariantContextAdaptors.toVariantContext("fixme", (RodVCF) rod, new Allele(BaseUtils.charSeq2byteSeq(context.getBases()),true)); - } - if ( vcon == null ) { - return null; - } - Set otherAlleles = vcon.getAlternateAlleles(); - VariantContext fixedContext = new VariantContext(vcon.getName(),context.getLocus(),otherAlleles,vcon.getGenotypes(),vcon.getNegLog10PError(),vcon.getFilters(),vcon.getAttributes()); - return VariantContextAdaptors.toVCF(fixedContext,context.getBase());*/ - } - - public Long reduce(VCFRecord con, Long num) { - if ( con == null ) { - return num; - } - - vcfWriter.addRecord(con); - return 1 + num; - } - - public Long reduceInit() { - return 0l; - } - - public void onTraversalDone(Long num){ - return; - } -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java index 4d784dfbe..6a0759ac8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java @@ -179,7 +179,7 @@ public class TrioGenotyperWalker extends RefWalker{ if ( a == 0 ) writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc)); - writer.addRecord(VariantContextAdaptors.toVCF(vc, (byte)'.')); + writer.add(vc, new byte[]{(byte)'.'}); a++; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 044ebabfa..2d4e86b58 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -118,7 +118,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker { for ( VariantContext vc : VCs ) { if( !vc.getName().toUpperCase().startsWith("TRUTH") ) { if( vc.isVariant() ) { - dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant ); + dataManager.addAnnotations( vc, SAMPLE_NAME, isInTruthSet, isTrueVariant ); } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index a343610f5..b7d8c214c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; -import org.broad.tribble.vcf.VCFRecord; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; @@ -54,7 +52,7 @@ public class AnnotationDataManager { INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS; } - public void addAnnotations( final VariantContext vc, final byte ref, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { + public void addAnnotations( final VariantContext vc, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) { if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName if( vc.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out @@ -62,24 +60,22 @@ public class AnnotationDataManager { } } // else, process all samples - VCFRecord variant = VariantContextAdaptors.toVCF(vc, ref); - // Loop over each annotation in the vcf record - final Map infoField = variant.getInfoValues(); - infoField.put("QUAL", ((Double)variant.getQual()).toString() ); // add QUAL field to annotations - for( final String annotationKey : infoField.keySet() ) { + final Map infoField = vc.getAttributes(); + infoField.put("QUAL", ((Double)vc.getPhredScaledQual()).toString() ); // add QUAL field to annotations + for( Map.Entry annotation : infoField.entrySet() ) { float value; try { - value = Float.parseFloat( infoField.get( annotationKey ) ); + value = Float.parseFloat( annotation.getValue().toString() ); } catch( NumberFormatException e ) { continue; // Skip over annotations that aren't floats, like "DB" } - TreeSet treeSet = data.get( annotationKey ); + TreeSet treeSet = data.get( annotation.getKey() ); if( treeSet == null ) { // This annotation hasn't been seen before treeSet = new TreeSet( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation - data.put( annotationKey, treeSet ); + data.put( annotation.getKey(), treeSet ); } AnnotationDatum datum = new AnnotationDatum( value ); if( treeSet.contains(datum) ) { // contains() uses AnnotationDatum's equals function, so it only checks if the value field is already present @@ -88,7 +84,7 @@ public class AnnotationDataManager { treeSet.add(datum); } - final boolean isNovelVariant = variant.getID().equals("."); + final boolean isNovelVariant = infoField.containsKey("ID"); // Decide if the variant is a transition or transversion if ( vc.isSNP() ) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 902a7d86e..3d7a34b56 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -118,7 +118,7 @@ public class for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "cd8616aca14eb77bd90732fbfce038d5")); + Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "d94328f4a5f7c40e95edf2ef13f38ae0")); executeTest("testVEWriteVCF", spec); } }