Two dumb oneoff walkers written to fix & annotate the Baylor indel calls (which came in sans reference, and without coding/intron annotations).
ERIC -- does the IndelAnnotator (the RefSeq lookup code I stole from IndelGentoyperV2) want to be its own Annotation inside VariantAnnotator? Is Andrey already doing this as part of adding indel calling to UG? git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3226 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2fdc1cf490
commit
84f1ccd6ac
|
|
@ -0,0 +1,113 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Apr 21, 2010
|
||||||
|
*/
|
||||||
|
public class IndelAnnotator extends RodWalker<Integer,Long>{
|
||||||
|
@Argument(fullName="refseq", shortName="refseq",
|
||||||
|
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=true)
|
||||||
|
String RefseqFileName = null;
|
||||||
|
|
||||||
|
private static String annGenomic = "GENOMIC";
|
||||||
|
private static String annIntron = "INTRON";
|
||||||
|
private static String annUTR = "UTR";
|
||||||
|
private static String annCoding = "CODING";
|
||||||
|
private static String annUnknown = "UNKNOWN";
|
||||||
|
|
||||||
|
private SeekableRODIterator refseqIterator;
|
||||||
|
private VCFWriter vcfWriter;
|
||||||
|
|
||||||
|
private String getAnnotationString(RODRecordList ann) {
|
||||||
|
if ( ann == null ) return annGenomic;
|
||||||
|
else {
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
|
||||||
|
if ( rodRefSeq.isExon(ann) ) {
|
||||||
|
if ( rodRefSeq.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
|
||||||
|
else b.append(annUTR); // exon but not coding = UTR
|
||||||
|
} else {
|
||||||
|
if ( rodRefSeq.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
|
||||||
|
else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon...
|
||||||
|
}
|
||||||
|
b.append('\t');
|
||||||
|
b.append(((Transcript)ann.get(0).getUnderlyingObject()).getGeneName()); // there is at least one transcript in the list, guaranteed
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if ( RefseqFileName != null ) {
|
||||||
|
ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
|
||||||
|
new java.io.File(RefseqFileName), rodRefSeq.class);
|
||||||
|
|
||||||
|
refseqIterator = new SeekableRODIterator(new GATKFeatureIterator(refseq.iterator()));
|
||||||
|
logger.info("Using RefSeq annotations from "+RefseqFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( refseqIterator == null ) logger.info("No annotations available");
|
||||||
|
|
||||||
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
|
hInfo.add(new VCFHeaderLine("source", "IndelAnnotator"));
|
||||||
|
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||||
|
HashSet<VCFInfoHeaderLine> anno = new HashSet<VCFInfoHeaderLine>();
|
||||||
|
anno.add(new VCFInfoHeaderLine("type",1,VCFInfoHeaderLine.INFO_TYPE.String,"Genomic interpretation (according to RefSeq)"));
|
||||||
|
hInfo.addAll(anno);
|
||||||
|
|
||||||
|
vcfWriter = new VCFWriter(out);
|
||||||
|
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||||
|
vcfWriter.writeHeader(vcfHeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long reduceInit() {
|
||||||
|
return 0l;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext con) {
|
||||||
|
if ( tracker == null )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
List<Object> rods = tracker.getReferenceMetaData("variant");
|
||||||
|
// ignore places where we don't have a variant
|
||||||
|
if ( rods.size() == 0 )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
Object variant = rods.get(0);
|
||||||
|
|
||||||
|
if ( variant instanceof RodVCF ) {
|
||||||
|
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
|
||||||
|
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||||
|
annotationString = annotationString.split("\\s+")[0];
|
||||||
|
((RodVCF) variant).getRecord().addInfoField("type",annotationString);
|
||||||
|
vcfWriter.addRecord(((RodVCF) variant).getRecord());
|
||||||
|
} else {
|
||||||
|
throw new StingException("This one-off walker only deals with VCF files.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long reduce(Integer i, Long j) {
|
||||||
|
return i + j;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Apr 13, 2010
|
||||||
|
*/
|
||||||
|
public class VCFReferenceFixerWalker extends RodWalker<VCFRecord,Long> {
|
||||||
|
|
||||||
|
private VCFWriter vcfWriter;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
TreeSet<String> samples = new TreeSet<String>();
|
||||||
|
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
|
||||||
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
|
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
|
||||||
|
vcfWriter = new VCFWriter(out);
|
||||||
|
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
|
vcfWriter.writeHeader(vcfHeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext context, AlignmentContext alicon) {
|
||||||
|
if ( tracker == null ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
List<Object> rods = tracker.getReferenceMetaData("fixme");
|
||||||
|
Object rod = rods.get(0);
|
||||||
|
RodVCF vcfrod = null;
|
||||||
|
if ( rod instanceof RodVCF ) {
|
||||||
|
vcfrod = (RodVCF) rod;
|
||||||
|
}
|
||||||
|
|
||||||
|
VCFRecord rec = vcfrod.getRecord();
|
||||||
|
rec.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases())));
|
||||||
|
return rec;
|
||||||
|
|
||||||
|
/*
|
||||||
|
VariantContext vcon = null;
|
||||||
|
if ( rod instanceof RodVCF) {
|
||||||
|
vcon = VariantContextAdaptors.toVariantContext("fixme", (RodVCF) rod, new Allele(BaseUtils.charSeq2byteSeq(context.getBases()),true));
|
||||||
|
}
|
||||||
|
if ( vcon == null ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Set<Allele> otherAlleles = vcon.getAlternateAlleles();
|
||||||
|
VariantContext fixedContext = new VariantContext(vcon.getName(),context.getLocus(),otherAlleles,vcon.getGenotypes(),vcon.getNegLog10PError(),vcon.getFilters(),vcon.getAttributes());
|
||||||
|
return VariantContextAdaptors.toVCF(fixedContext,context.getBase());*/
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long reduce(VCFRecord con, Long num) {
|
||||||
|
if ( con == null ) {
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
|
||||||
|
vcfWriter.addRecord(con);
|
||||||
|
return 1 + num;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long reduceInit() {
|
||||||
|
return 0l;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Long num){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue