No more manually converting VariantContexts to VCFRecords. You should be utilizing VCs and not VCFRecords.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3787 348d0f76-0448-11de-a6fe-93d51630548a
2010-07-14 05:21:28 +00:00 · 2010-07-14 05:21:28 +00:00 · 460283f6d2
parent 6b5c88d4d6
commit 460283f6d2
10 changed files with 27 additions and 279 deletions
--- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
@ -14,7 +14,6 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.genotype.CalledGenotype;
 import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

 import java.util.*;

@ -250,170 +249,6 @@ public class VariantContextAdaptors {
        return new VCFHeader(hInfo == null ? new HashSet<VCFHeaderLine>() : hInfo, names);
    }

-    public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase) {
-        List<String> allowedGenotypeAttributeKeys = null;
-        boolean filtersWereAppliedToContext = true;
-        boolean filtersWereAppliedToGenotypes = false;
-
-        // deal with the reference
-        String referenceBases = new String(vc.getReference().getBases());
-
-        String contig = vc.getLocation().getContig();
-        long position = vc.getLocation().getStart();
-        String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFConstants.EMPTY_ID_FIELD;
-        double qual = vc.hasNegLog10PError() ? vc.getPhredScaledQual() : -1;
-        
-        String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v3 : VCFConstants.UNFILTERED);
-
-        Map<Allele, VCFGenotypeEncoding> alleleMap = new HashMap<Allele, VCFGenotypeEncoding>();
-        alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup
-        List<VCFGenotypeEncoding> vcfAltAlleles = new ArrayList<VCFGenotypeEncoding>();
-        for ( Allele a : vc.getAlleles() ) {
-
-            VCFGenotypeEncoding encoding;
-
-            // This is tricky because the VCF spec states that the reference must be a single
-            // character, whereas the reference alleles for deletions of length > 1 are strings.
-            // To be safe, we require the reference base be passed in and we use that whenever
-            // we know that the given allele is the reference.
-
-            String alleleString = new String(a.getBases());
-            if ( vc.getType() == VariantContext.Type.MIXED ) {
-                throw new UnsupportedOperationException("Conversion from a mixed type isn't currently supported");
-            } else if ( vc.getType() == VariantContext.Type.INDEL ) {
-                if ( a.isNull() ) {
-                    if ( a.isReference() ) {
-                        // ref, where alt is insertion
-                        encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase));
-                    } else {
-                        // non-ref deletion
-                        encoding = new VCFGenotypeEncoding("D" + Integer.toString(referenceBases.length()));
-                    }
-                } else if ( a.isReference() ) {
-                    // ref, where alt is deletion
-                    encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase));
-                } else {
-                    // non-ref insertion
-                    encoding = new VCFGenotypeEncoding("I" + alleleString);
-                }
-            } else if ( vc.getType() == VariantContext.Type.NO_VARIATION ) {
-                // ref
-                encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase));
-            } else {
-                // ref or alt for snp
-                encoding = new VCFGenotypeEncoding(alleleString);
-            }
-
-            if ( a.isNonReference() ) {
-                vcfAltAlleles.add(encoding);
-            }
-
-            alleleMap.put(a, encoding);
-        }
-
-        List<String> vcfGenotypeAttributeKeys = new ArrayList<String>();
-        if ( vc.hasGenotypes() ) {
-            vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
-            for ( String key : calcVCFGenotypeKeys(vc) ) {
-                if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
-                    vcfGenotypeAttributeKeys.add(key);
-            }
-            if ( filtersWereAppliedToGenotypes )
-                vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_FILTER_KEY);
-        }
-        String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys);
-
-        List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>(vc.getGenotypes().size());
-        for ( Genotype g : vc.getGenotypesSortedByName() ) {
-            List<VCFGenotypeEncoding> encodings = new ArrayList<VCFGenotypeEncoding>(g.getPloidy());
-
-            for ( Allele a : g.getAlleles() ) {
-                encodings.add(alleleMap.get(a));
-            }
-
-            VCFGenotypeRecord.PHASE phasing = g.genotypesArePhased() ? VCFGenotypeRecord.PHASE.PHASED : VCFGenotypeRecord.PHASE.UNPHASED;
-            VCFGenotypeRecord vcfG = new VCFGenotypeRecord(g.getSampleName(), encodings, phasing);
-
-            for ( String key : vcfGenotypeAttributeKeys ) {
-                if ( key.equals(VCFConstants.GENOTYPE_KEY) )
-                    continue;
-                
-                Object val = g.getAttribute(key);
-                // some exceptions
-                if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
-                    if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
-                        val = VCFConstants.MISSING_GENOTYPE_QUALITY_v3;
-                    else
-                        val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE);
-                } else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) {
-                    ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
-                    if ( pileup != null )
-                        val = pileup.size();
-                } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
-                    val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v3;
-                }
-
-                String outputValue = formatVCFField(key, val);
-                if ( outputValue != null )
-                    vcfG.setField(key, outputValue);
-            }
-
-            genotypeObjects.add(vcfG);
-        }
-        // info fields
-        Map<String, String> infoFields = new HashMap<String, String>();
-        for ( Map.Entry<String, Object> elt : vc.getAttributes().entrySet() ) {
-            String key = elt.getKey();
-            if ( key.equals("ID") )
-                continue;
-
-            String outputValue = formatVCFField(key, elt.getValue());
-            if ( outputValue != null )
-                infoFields.put(key, outputValue);
-        }
-
-        return new VCFRecord(Character.toString((char)vcfRefBase), contig, position, ID, vcfAltAlleles, qual, filters, infoFields, genotypeFormatString, genotypeObjects);
-    }
-
-    private static String formatVCFField(String key, Object val) {
-        String result;
-        if ( val == null )
-            result = VCFGenotypeRecord.getMissingFieldValue(key);
-        else if ( val instanceof Double )
-            result = String.format("%.2f", (Double)val);
-        else if ( val instanceof Boolean )
-            result = (Boolean)val ? "" : null; // empty string for true, null for false
-        else if ( val instanceof List ) {
-            List list = (List)val;
-            if ( list.size() == 0 )
-                return formatVCFField(key, null);
-            StringBuffer sb = new StringBuffer(formatVCFField(key, list.get(0)));
-            for ( int i = 1; i < list.size(); i++) {
-                sb.append(",");
-                sb.append(formatVCFField(key, list.get(i)));
-            }
-            result = sb.toString();
-        } else
-            result = val.toString();
-
-        return result;
-    }
-
-    private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
-        Set<String> keys = new HashSet<String>();
-
-        boolean sawGoodQual = false;
-        for ( Genotype g : vc.getGenotypes().values() ) {
-            keys.addAll(g.getAttributes().keySet());
-            if ( g.hasNegLog10PError() )
-                sawGoodQual = true;
-        }
-
-        if ( sawGoodQual )
-            keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
-        return Utils.sorted(new ArrayList<String>(keys));
-    }
-
    // --------------------------------------------------------------------------------------------------------------
    //
    // Plink to VariantContext
--- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -547,7 +547,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
                wroteHeader = true;
            }

-            writer.addRecord(VariantContextAdaptors.toVCF(mvc, ref));
+            writer.add(mvc, new byte[]{ref});
            //interestingReasons.clear();
        }
    }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java
@ -5,6 +5,8 @@ import org.broad.tribble.vcf.*;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
 import org.broadinstitute.sting.gatk.refdata.*;
 import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
 import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
@ -95,22 +97,19 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
        if ( tracker == null )
            return 0;

-        List<Object> rods = tracker.getReferenceMetaData("variant");
+        VariantContext vc = tracker.getVariantContext(ref, "variant", null, con.getLocation(), true);
        // ignore places where we don't have a variant
-        if ( rods.size() == 0 )
+        if ( vc == null )
            return 0;

-        Object variant = rods.get(0);
+        RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
+        String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
+        annotationString = annotationString.split("\\s+")[0];

-        if ( variant instanceof VCFRecord) {
-            RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
-            String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
-            annotationString = annotationString.split("\\s+")[0];
-            ((VCFRecord) variant).addInfoField("type",annotationString);
-            vcfWriter.addRecord((VCFRecord) variant);
-        } else {
-            throw new StingException("This one-off walker only deals with VCF files.");
-        }
+        Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
+        attrs.put("type",annotationString);
+        vc = VariantContextUtils.modifyAttributes(vc, attrs);
+        vcfWriter.add(vc, new byte[]{ref.getBase()});

        return 1;
    }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java
@ -142,12 +142,12 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
        if ( vcfWriter != null ) {
            int i = 0;
            while ( i < compContexts.size() && compContexts.get(i).getLocation().isBefore(evalContexts.get(0).getLocation())) {
-                vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]));
+                vcfWriter.add(compContexts.get(i), new byte[]{compContexts.get(i).getReference().getBases()[0]});
                i++;
            }
-            vcfWriter.addRecord(VariantContextAdaptors.toVCF(evalContexts.get(0),ref.getBase()));
+            vcfWriter.add(evalContexts.get(0), new byte[]{ref.getBase()});
            while ( i < compContexts.size() && compContexts.get(i).getLocation().distance(evalContexts.get(0).getLocation()) <= indelWindow) {
-                vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]));
+                vcfWriter.add(compContexts.get(i), new byte[]{compContexts.get(i).getReference().getBases()[0]});
                i++;
            }
        }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java
@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
 import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
 import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
@ -602,7 +601,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
    public VCFWriter reduce(MendelianViolation variant, VCFWriter writer) {
        if ( variant != null ) {
            trioStructure.updateHomozygosityRegions(variant,bedOutput);
-            writer.addRecord(VariantContextAdaptors.toVCF(variant.toVariantContext(),variant.getRefBase()));
+            writer.add(variant.toVariantContext(), new byte[]{variant.getRefBase()});
        }

        return writer;
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCFReferenceFixerWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCFReferenceFixerWalker.java
@ -1,81 +0,0 @@
-package org.broadinstitute.sting.oneoffprojects.walkers;
-
-import org.broad.tribble.vcf.VCFHeader;
-import org.broad.tribble.vcf.VCFHeaderLine;
-import org.broad.tribble.vcf.VCFRecord;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.genotype.vcf.*;
-
-import java.util.*;
-
-/**
- * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
- *
- * @Author chartl
- * @Date Apr 13, 2010
- */
-public class VCFReferenceFixerWalker extends RodWalker<VCFRecord,Long> {
-
-    private VCFWriter vcfWriter;
-
-    public void initialize() {
-        TreeSet<String> samples = new TreeSet<String>();
-        SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
-        Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
-        hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
-        hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
-        vcfWriter = new VCFWriter(out);
-        VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
-        vcfWriter.writeHeader(vcfHeader);
-    }
-
-    public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext context, AlignmentContext alicon) {
-        if ( tracker == null ) {
-            return null;
-        }
-        List<Object> rods = tracker.getReferenceMetaData("fixme");
-        Object rod = rods.get(0);
-        VCFRecord vcfrod = null;
-        if ( rod instanceof VCFRecord ) {
-            vcfrod = (VCFRecord) rod;
-        }
-
-        if (vcfrod != null) vcfrod.setReferenceBase(new String(context.getBases()));
-        return vcfrod;
-
-        /*
-        VariantContext vcon = null;
-        if ( rod instanceof RodVCF) {
-            vcon = VariantContextAdaptors.toVariantContext("fixme", (RodVCF) rod, new Allele(BaseUtils.charSeq2byteSeq(context.getBases()),true));
-        }
-        if ( vcon == null ) {
-            return null;
-        }
-        Set<Allele> otherAlleles = vcon.getAlternateAlleles();
-        VariantContext fixedContext = new VariantContext(vcon.getName(),context.getLocus(),otherAlleles,vcon.getGenotypes(),vcon.getNegLog10PError(),vcon.getFilters(),vcon.getAttributes());
-        return VariantContextAdaptors.toVCF(fixedContext,context.getBase());*/
-    }
-
-    public Long reduce(VCFRecord con, Long num) {
-        if ( con == null ) {
-            return num;
-        }
-
-        vcfWriter.addRecord(con);
-        return 1 + num;
-    }
-
-    public Long reduceInit() {
-        return 0l;
-    }
-
-    public void onTraversalDone(Long num){
-        return;
-    }
-}
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java
@ -179,7 +179,7 @@ public class TrioGenotyperWalker extends RefWalker<VariantContext, Integer>{
            if ( a == 0 )
                writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));

-            writer.addRecord(VariantContextAdaptors.toVCF(vc, (byte)'.'));
+            writer.add(vc, new byte[]{(byte)'.'});
            a++;
        }

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java
@ -118,7 +118,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
            for ( VariantContext vc : VCs ) {
                if( !vc.getName().toUpperCase().startsWith("TRUTH") ) {
                    if( vc.isVariant() ) {
-                        dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant );
+                        dataManager.addAnnotations( vc, SAMPLE_NAME, isInTruthSet, isTrueVariant );
                    }
                }
            }
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java
@ -1,7 +1,5 @@
 package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;

-import org.broad.tribble.vcf.VCFRecord;
-import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
 import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.StingException;
@ -54,7 +52,7 @@ public class AnnotationDataManager {
        INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS;
    }

-    public void addAnnotations( final VariantContext vc, final byte ref, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
+    public void addAnnotations( final VariantContext vc, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {

        if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName
            if( vc.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out
@ -62,24 +60,22 @@ public class AnnotationDataManager {
            }
        } // else, process all samples

-        VCFRecord variant = VariantContextAdaptors.toVCF(vc, ref);
-
        // Loop over each annotation in the vcf record
-        final Map<String,String> infoField = variant.getInfoValues();
-        infoField.put("QUAL", ((Double)variant.getQual()).toString() ); // add QUAL field to annotations
-        for( final String annotationKey : infoField.keySet() ) {
+        final Map<String,Object> infoField = vc.getAttributes();
+        infoField.put("QUAL", ((Double)vc.getPhredScaledQual()).toString() ); // add QUAL field to annotations
+        for( Map.Entry<String, Object> annotation : infoField.entrySet() ) {

            float value;
            try {
-                value = Float.parseFloat( infoField.get( annotationKey ) );
+                value = Float.parseFloat( annotation.getValue().toString() );
            } catch( NumberFormatException e ) {
                continue; // Skip over annotations that aren't floats, like "DB"
            }

-            TreeSet<AnnotationDatum> treeSet = data.get( annotationKey );
+            TreeSet<AnnotationDatum> treeSet = data.get( annotation.getKey() );
            if( treeSet == null ) { // This annotation hasn't been seen before
                treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
-                data.put( annotationKey, treeSet );
+                data.put( annotation.getKey(), treeSet );
            }
            AnnotationDatum datum = new AnnotationDatum( value );
            if( treeSet.contains(datum) ) { // contains() uses AnnotationDatum's equals function, so it only checks if the value field is already present
@ -88,7 +84,7 @@ public class AnnotationDataManager {
                treeSet.add(datum);
            }

-            final boolean isNovelVariant = variant.getID().equals(".");
+            final boolean isNovelVariant = infoField.containsKey("ID");

            // Decide if the variant is a transition or transversion
            if ( vc.isSNP() ) {
--- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@ -118,7 +118,7 @@ public class
        for (String tests : testsEnumerations) {
            WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
                    2,
-                    Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "cd8616aca14eb77bd90732fbfce038d5"));
+                    Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "d94328f4a5f7c40e95edf2ef13f38ae0"));
            executeTest("testVEWriteVCF", spec);
        }
    }