Two dumb oneoff walkers written to fix & annotate the Baylor indel calls (which came in sans reference, and without coding/intron annotations).
ERIC -- does the IndelAnnotator (the RefSeq lookup code I stole from IndelGentoyperV2) want to be its own Annotation inside VariantAnnotator? Is Andrey already doing this as part of adding indel calling to UG? git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3226 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2fdc1cf490
commit
84f1ccd6ac
|
|
@ -0,0 +1,113 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date Apr 21, 2010
|
||||
*/
|
||||
public class IndelAnnotator extends RodWalker<Integer,Long>{
|
||||
@Argument(fullName="refseq", shortName="refseq",
|
||||
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=true)
|
||||
String RefseqFileName = null;
|
||||
|
||||
private static String annGenomic = "GENOMIC";
|
||||
private static String annIntron = "INTRON";
|
||||
private static String annUTR = "UTR";
|
||||
private static String annCoding = "CODING";
|
||||
private static String annUnknown = "UNKNOWN";
|
||||
|
||||
private SeekableRODIterator refseqIterator;
|
||||
private VCFWriter vcfWriter;
|
||||
|
||||
private String getAnnotationString(RODRecordList ann) {
|
||||
if ( ann == null ) return annGenomic;
|
||||
else {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
if ( rodRefSeq.isExon(ann) ) {
|
||||
if ( rodRefSeq.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
|
||||
else b.append(annUTR); // exon but not coding = UTR
|
||||
} else {
|
||||
if ( rodRefSeq.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
|
||||
else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon...
|
||||
}
|
||||
b.append('\t');
|
||||
b.append(((Transcript)ann.get(0).getUnderlyingObject()).getGeneName()); // there is at least one transcript in the list, guaranteed
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
if ( RefseqFileName != null ) {
|
||||
ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
|
||||
new java.io.File(RefseqFileName), rodRefSeq.class);
|
||||
|
||||
refseqIterator = new SeekableRODIterator(new GATKFeatureIterator(refseq.iterator()));
|
||||
logger.info("Using RefSeq annotations from "+RefseqFileName);
|
||||
}
|
||||
|
||||
if ( refseqIterator == null ) logger.info("No annotations available");
|
||||
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "IndelAnnotator"));
|
||||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
HashSet<VCFInfoHeaderLine> anno = new HashSet<VCFInfoHeaderLine>();
|
||||
anno.add(new VCFInfoHeaderLine("type",1,VCFInfoHeaderLine.INFO_TYPE.String,"Genomic interpretation (according to RefSeq)"));
|
||||
hInfo.addAll(anno);
|
||||
|
||||
vcfWriter = new VCFWriter(out);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
public Long reduceInit() {
|
||||
return 0l;
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext con) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
List<Object> rods = tracker.getReferenceMetaData("variant");
|
||||
// ignore places where we don't have a variant
|
||||
if ( rods.size() == 0 )
|
||||
return 0;
|
||||
|
||||
Object variant = rods.get(0);
|
||||
|
||||
if ( variant instanceof RodVCF ) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
annotationString = annotationString.split("\\s+")[0];
|
||||
((RodVCF) variant).getRecord().addInfoField("type",annotationString);
|
||||
vcfWriter.addRecord(((RodVCF) variant).getRecord());
|
||||
} else {
|
||||
throw new StingException("This one-off walker only deals with VCF files.");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Long reduce(Integer i, Long j) {
|
||||
return i + j;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||
*
|
||||
* @Author chartl
|
||||
* @Date Apr 13, 2010
|
||||
*/
|
||||
public class VCFReferenceFixerWalker extends RodWalker<VCFRecord,Long> {
|
||||
|
||||
private VCFWriter vcfWriter;
|
||||
|
||||
public void initialize() {
|
||||
TreeSet<String> samples = new TreeSet<String>();
|
||||
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
|
||||
vcfWriter = new VCFWriter(out);
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext context, AlignmentContext alicon) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
List<Object> rods = tracker.getReferenceMetaData("fixme");
|
||||
Object rod = rods.get(0);
|
||||
RodVCF vcfrod = null;
|
||||
if ( rod instanceof RodVCF ) {
|
||||
vcfrod = (RodVCF) rod;
|
||||
}
|
||||
|
||||
VCFRecord rec = vcfrod.getRecord();
|
||||
rec.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases())));
|
||||
return rec;
|
||||
|
||||
/*
|
||||
VariantContext vcon = null;
|
||||
if ( rod instanceof RodVCF) {
|
||||
vcon = VariantContextAdaptors.toVariantContext("fixme", (RodVCF) rod, new Allele(BaseUtils.charSeq2byteSeq(context.getBases()),true));
|
||||
}
|
||||
if ( vcon == null ) {
|
||||
return null;
|
||||
}
|
||||
Set<Allele> otherAlleles = vcon.getAlternateAlleles();
|
||||
VariantContext fixedContext = new VariantContext(vcon.getName(),context.getLocus(),otherAlleles,vcon.getGenotypes(),vcon.getNegLog10PError(),vcon.getFilters(),vcon.getAttributes());
|
||||
return VariantContextAdaptors.toVCF(fixedContext,context.getBase());*/
|
||||
}
|
||||
|
||||
public Long reduce(VCFRecord con, Long num) {
|
||||
if ( con == null ) {
|
||||
return num;
|
||||
}
|
||||
|
||||
vcfWriter.addRecord(con);
|
||||
return 1 + num;
|
||||
}
|
||||
|
||||
public Long reduceInit() {
|
||||
return 0l;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Long num){
|
||||
return;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue