No more manually converting VariantContexts to VCFRecords. You should be utilizing VCs and not VCFRecords.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3787 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-14 05:21:28 +00:00
parent 6b5c88d4d6
commit 460283f6d2
10 changed files with 27 additions and 279 deletions

View File

@ -14,7 +14,6 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.*;
@ -250,170 +249,6 @@ public class VariantContextAdaptors {
return new VCFHeader(hInfo == null ? new HashSet<VCFHeaderLine>() : hInfo, names);
}
public static VCFRecord toVCF(VariantContext vc, byte vcfRefBase) {
List<String> allowedGenotypeAttributeKeys = null;
boolean filtersWereAppliedToContext = true;
boolean filtersWereAppliedToGenotypes = false;
// deal with the reference
String referenceBases = new String(vc.getReference().getBases());
String contig = vc.getLocation().getContig();
long position = vc.getLocation().getStart();
String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFConstants.EMPTY_ID_FIELD;
double qual = vc.hasNegLog10PError() ? vc.getPhredScaledQual() : -1;
String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v3 : VCFConstants.UNFILTERED);
Map<Allele, VCFGenotypeEncoding> alleleMap = new HashMap<Allele, VCFGenotypeEncoding>();
alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup
List<VCFGenotypeEncoding> vcfAltAlleles = new ArrayList<VCFGenotypeEncoding>();
for ( Allele a : vc.getAlleles() ) {
VCFGenotypeEncoding encoding;
// This is tricky because the VCF spec states that the reference must be a single
// character, whereas the reference alleles for deletions of length > 1 are strings.
// To be safe, we require the reference base be passed in and we use that whenever
// we know that the given allele is the reference.
String alleleString = new String(a.getBases());
if ( vc.getType() == VariantContext.Type.MIXED ) {
throw new UnsupportedOperationException("Conversion from a mixed type isn't currently supported");
} else if ( vc.getType() == VariantContext.Type.INDEL ) {
if ( a.isNull() ) {
if ( a.isReference() ) {
// ref, where alt is insertion
encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase));
} else {
// non-ref deletion
encoding = new VCFGenotypeEncoding("D" + Integer.toString(referenceBases.length()));
}
} else if ( a.isReference() ) {
// ref, where alt is deletion
encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase));
} else {
// non-ref insertion
encoding = new VCFGenotypeEncoding("I" + alleleString);
}
} else if ( vc.getType() == VariantContext.Type.NO_VARIATION ) {
// ref
encoding = new VCFGenotypeEncoding(Character.toString((char)vcfRefBase));
} else {
// ref or alt for snp
encoding = new VCFGenotypeEncoding(alleleString);
}
if ( a.isNonReference() ) {
vcfAltAlleles.add(encoding);
}
alleleMap.put(a, encoding);
}
List<String> vcfGenotypeAttributeKeys = new ArrayList<String>();
if ( vc.hasGenotypes() ) {
vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
for ( String key : calcVCFGenotypeKeys(vc) ) {
if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
vcfGenotypeAttributeKeys.add(key);
}
if ( filtersWereAppliedToGenotypes )
vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_FILTER_KEY);
}
String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys);
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>(vc.getGenotypes().size());
for ( Genotype g : vc.getGenotypesSortedByName() ) {
List<VCFGenotypeEncoding> encodings = new ArrayList<VCFGenotypeEncoding>(g.getPloidy());
for ( Allele a : g.getAlleles() ) {
encodings.add(alleleMap.get(a));
}
VCFGenotypeRecord.PHASE phasing = g.genotypesArePhased() ? VCFGenotypeRecord.PHASE.PHASED : VCFGenotypeRecord.PHASE.UNPHASED;
VCFGenotypeRecord vcfG = new VCFGenotypeRecord(g.getSampleName(), encodings, phasing);
for ( String key : vcfGenotypeAttributeKeys ) {
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
continue;
Object val = g.getAttribute(key);
// some exceptions
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
val = VCFConstants.MISSING_GENOTYPE_QUALITY_v3;
else
val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE);
} else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) {
ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
if ( pileup != null )
val = pileup.size();
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v3;
}
String outputValue = formatVCFField(key, val);
if ( outputValue != null )
vcfG.setField(key, outputValue);
}
genotypeObjects.add(vcfG);
}
// info fields
Map<String, String> infoFields = new HashMap<String, String>();
for ( Map.Entry<String, Object> elt : vc.getAttributes().entrySet() ) {
String key = elt.getKey();
if ( key.equals("ID") )
continue;
String outputValue = formatVCFField(key, elt.getValue());
if ( outputValue != null )
infoFields.put(key, outputValue);
}
return new VCFRecord(Character.toString((char)vcfRefBase), contig, position, ID, vcfAltAlleles, qual, filters, infoFields, genotypeFormatString, genotypeObjects);
}
private static String formatVCFField(String key, Object val) {
String result;
if ( val == null )
result = VCFGenotypeRecord.getMissingFieldValue(key);
else if ( val instanceof Double )
result = String.format("%.2f", (Double)val);
else if ( val instanceof Boolean )
result = (Boolean)val ? "" : null; // empty string for true, null for false
else if ( val instanceof List ) {
List list = (List)val;
if ( list.size() == 0 )
return formatVCFField(key, null);
StringBuffer sb = new StringBuffer(formatVCFField(key, list.get(0)));
for ( int i = 1; i < list.size(); i++) {
sb.append(",");
sb.append(formatVCFField(key, list.get(i)));
}
result = sb.toString();
} else
result = val.toString();
return result;
}
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
Set<String> keys = new HashSet<String>();
boolean sawGoodQual = false;
for ( Genotype g : vc.getGenotypes().values() ) {
keys.addAll(g.getAttributes().keySet());
if ( g.hasNegLog10PError() )
sawGoodQual = true;
}
if ( sawGoodQual )
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
return Utils.sorted(new ArrayList<String>(keys));
}
// --------------------------------------------------------------------------------------------------------------
//
// Plink to VariantContext

View File

@ -547,7 +547,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
wroteHeader = true;
}
writer.addRecord(VariantContextAdaptors.toVCF(mvc, ref));
writer.add(mvc, new byte[]{ref});
//interestingReasons.clear();
}
}

View File

@ -5,6 +5,8 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
@ -95,22 +97,19 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
if ( tracker == null )
return 0;
List<Object> rods = tracker.getReferenceMetaData("variant");
VariantContext vc = tracker.getVariantContext(ref, "variant", null, con.getLocation(), true);
// ignore places where we don't have a variant
if ( rods.size() == 0 )
if ( vc == null )
return 0;
Object variant = rods.get(0);
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
annotationString = annotationString.split("\\s+")[0];
if ( variant instanceof VCFRecord) {
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
annotationString = annotationString.split("\\s+")[0];
((VCFRecord) variant).addInfoField("type",annotationString);
vcfWriter.addRecord((VCFRecord) variant);
} else {
throw new StingException("This one-off walker only deals with VCF files.");
}
Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
attrs.put("type",annotationString);
vc = VariantContextUtils.modifyAttributes(vc, attrs);
vcfWriter.add(vc, new byte[]{ref.getBase()});
return 1;
}

View File

@ -142,12 +142,12 @@ public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator>
if ( vcfWriter != null ) {
int i = 0;
while ( i < compContexts.size() && compContexts.get(i).getLocation().isBefore(evalContexts.get(0).getLocation())) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]));
vcfWriter.add(compContexts.get(i), new byte[]{compContexts.get(i).getReference().getBases()[0]});
i++;
}
vcfWriter.addRecord(VariantContextAdaptors.toVCF(evalContexts.get(0),ref.getBase()));
vcfWriter.add(evalContexts.get(0), new byte[]{ref.getBase()});
while ( i < compContexts.size() && compContexts.get(i).getLocation().distance(evalContexts.get(0).getLocation()) <= indelWindow) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]));
vcfWriter.add(compContexts.get(i), new byte[]{compContexts.get(i).getReference().getBases()[0]});
i++;
}
}

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
@ -602,7 +601,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
public VCFWriter reduce(MendelianViolation variant, VCFWriter writer) {
if ( variant != null ) {
trioStructure.updateHomozygosityRegions(variant,bedOutput);
writer.addRecord(VariantContextAdaptors.toVCF(variant.toVariantContext(),variant.getRefBase()));
writer.add(variant.toVariantContext(), new byte[]{variant.getRefBase()});
}
return writer;

View File

@ -1,81 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
*
* @Author chartl
* @Date Apr 13, 2010
*/
public class VCFReferenceFixerWalker extends RodWalker<VCFRecord,Long> {
private VCFWriter vcfWriter;
public void initialize() {
TreeSet<String> samples = new TreeSet<String>();
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
vcfWriter = new VCFWriter(out);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
}
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext context, AlignmentContext alicon) {
if ( tracker == null ) {
return null;
}
List<Object> rods = tracker.getReferenceMetaData("fixme");
Object rod = rods.get(0);
VCFRecord vcfrod = null;
if ( rod instanceof VCFRecord ) {
vcfrod = (VCFRecord) rod;
}
if (vcfrod != null) vcfrod.setReferenceBase(new String(context.getBases()));
return vcfrod;
/*
VariantContext vcon = null;
if ( rod instanceof RodVCF) {
vcon = VariantContextAdaptors.toVariantContext("fixme", (RodVCF) rod, new Allele(BaseUtils.charSeq2byteSeq(context.getBases()),true));
}
if ( vcon == null ) {
return null;
}
Set<Allele> otherAlleles = vcon.getAlternateAlleles();
VariantContext fixedContext = new VariantContext(vcon.getName(),context.getLocus(),otherAlleles,vcon.getGenotypes(),vcon.getNegLog10PError(),vcon.getFilters(),vcon.getAttributes());
return VariantContextAdaptors.toVCF(fixedContext,context.getBase());*/
}
public Long reduce(VCFRecord con, Long num) {
if ( con == null ) {
return num;
}
vcfWriter.addRecord(con);
return 1 + num;
}
public Long reduceInit() {
return 0l;
}
public void onTraversalDone(Long num){
return;
}
}

View File

@ -179,7 +179,7 @@ public class TrioGenotyperWalker extends RefWalker<VariantContext, Integer>{
if ( a == 0 )
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
writer.addRecord(VariantContextAdaptors.toVCF(vc, (byte)'.'));
writer.add(vc, new byte[]{(byte)'.'});
a++;
}

View File

@ -118,7 +118,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
for ( VariantContext vc : VCs ) {
if( !vc.getName().toUpperCase().startsWith("TRUTH") ) {
if( vc.isVariant() ) {
dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant );
dataManager.addAnnotations( vc, SAMPLE_NAME, isInTruthSet, isTrueVariant );
}
}
}

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
@ -54,7 +52,7 @@ public class AnnotationDataManager {
INDICATE_MEAN_NUM_VARS = _INDICATE_MEAN_NUM_VARS;
}
public void addAnnotations( final VariantContext vc, final byte ref, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
public void addAnnotations( final VariantContext vc, final String sampleName, final boolean isInTruthSet, final boolean isTrueVariant ) {
if( sampleName != null ) { // Only process variants that are found in the sample with this sampleName
if( vc.getGenotype(sampleName).isNoCall() ) { // This variant isn't found in this sample so break out
@ -62,24 +60,22 @@ public class AnnotationDataManager {
}
} // else, process all samples
VCFRecord variant = VariantContextAdaptors.toVCF(vc, ref);
// Loop over each annotation in the vcf record
final Map<String,String> infoField = variant.getInfoValues();
infoField.put("QUAL", ((Double)variant.getQual()).toString() ); // add QUAL field to annotations
for( final String annotationKey : infoField.keySet() ) {
final Map<String,Object> infoField = vc.getAttributes();
infoField.put("QUAL", ((Double)vc.getPhredScaledQual()).toString() ); // add QUAL field to annotations
for( Map.Entry<String, Object> annotation : infoField.entrySet() ) {
float value;
try {
value = Float.parseFloat( infoField.get( annotationKey ) );
value = Float.parseFloat( annotation.getValue().toString() );
} catch( NumberFormatException e ) {
continue; // Skip over annotations that aren't floats, like "DB"
}
TreeSet<AnnotationDatum> treeSet = data.get( annotationKey );
TreeSet<AnnotationDatum> treeSet = data.get( annotation.getKey() );
if( treeSet == null ) { // This annotation hasn't been seen before
treeSet = new TreeSet<AnnotationDatum>( new AnnotationDatum() ); // AnnotationDatum is a Comparator that orders variants by the value of the Annotation
data.put( annotationKey, treeSet );
data.put( annotation.getKey(), treeSet );
}
AnnotationDatum datum = new AnnotationDatum( value );
if( treeSet.contains(datum) ) { // contains() uses AnnotationDatum's equals function, so it only checks if the value field is already present
@ -88,7 +84,7 @@ public class AnnotationDataManager {
treeSet.add(datum);
}
final boolean isNovelVariant = variant.getID().equals(".");
final boolean isNovelVariant = infoField.containsKey("ID");
// Decide if the variant is a transition or transversion
if ( vc.isSNP() ) {

View File

@ -118,7 +118,7 @@ public class
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
2,
Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "cd8616aca14eb77bd90732fbfce038d5"));
Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "d94328f4a5f7c40e95edf2ef13f38ae0"));
executeTest("testVEWriteVCF", spec);
}
}