Takes a refGene table ( -B arg must be: -B refgene,AnnotatorInfoTable,/path/to/refgene_file.txt) and generates the big table of nucleotides containing annotations for each possible variant at each transcript position (eg. 4 variants for each position).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3238 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
weisburd 2010-04-22 12:11:19 +00:00
parent 653e08c0b6
commit e643a9e7a5
1 changed files with 642 additions and 0 deletions

View File

@ -0,0 +1,642 @@
package org.broadinstitute.sting.playground.gatk.walkers.annotator;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.AnnotatorROD;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
/**
* Takes a refGene table ( -B arg must be: -B refgene,AnnotatorInfoTable,/path/to/refgene_file.txt) and generates the big table of nucleotides containing
* annotations for each possible variant at each transcript position (eg. 4 variants for each position).
*/
@Reference(window=@Window(start=-4,stop=4))
@By(DataSource.REFERENCE)
@Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name="refgene",type=AnnotatorROD.class) })
public class TranscriptToInfo extends RodWalker<Integer, Integer>
{
/**
RefGene column names:
# bin 182 smallint(5) unsigned range Indexing field to speed chromosome range queries.
# name NM_021649 varchar(255) values Name of gene (usually transcript_id from GTF)
# chrom chr5 varchar(255) values Reference sequence chromosome or scaffold
# strand - char(1) values + or - for strand
# txStart 114942238 int(10) unsigned range Transcription start position
# txEnd 114966008 int(10) unsigned range Transcription end position
# cdsStart 114944144 int(10) unsigned range Coding region start
# cdsEnd 114944852 int(10) unsigned range Coding region end
# exonCount 2 int(10) unsigned range Number of exons
# exonStarts 114942238,114965692, longblob Exon start positions
# exonEnds 114944911,114966008, longblob Exon end positions
# id 0 int(10) unsigned range Unique identifier
# name2 TICAM2 varchar(255) values Alternate name (e.g. gene_id from GTF)
# cdsStartStat cmpl enum('none','unk','incmpl','cmpl') values enum('none','unk','incmpl','cmpl')
# cdsEndStat cmpl enum('none','unk','incmpl','cmpl') values enum('none','unk','incmpl','cmpl')
# exonFrames 0,-1, longblob Exon frame {0,1,2}, or -1 if no frame for exon
*/
public static final int OUTPUT_STREAM_BUFFER_SIZE = 8*1024*1024;
/** Output columns */
public static final String OUTPUT_SORT_KEY = "00000_SORT_KEY";
public static final String OUTPUT_CHRPOS = AnnotatorROD.CHRPOS_COLUMN;
public static final String OUTPUT_HAPLOTYPE_REFERENCE = AnnotatorROD.HAPLOTYPE_REFERENCE_COLUMN;
public static final String OUTPUT_HAPLOTYPE_ALTERNATE = AnnotatorROD.HAPLOTYPE_ALTERNATE_COLUMN;
public static final String OUTPUT_HAPLOTYPE_STRAND = AnnotatorROD.HAPLOTYPE_STRAND_COLUMN;
public static final String OUTPUT_GENE_NAME = "geneName"; //eg. NDUFS2
public static final String OUTPUT_TRANSCRIPT_ACCESSION = "transcriptId"; //eg. NM_001212414.4
//public static final String OUTPUT_IN_TRANSCRIPT = "inTranscript"; //eg. true
public static final String OUTPUT_IN_CODING_REGION = "inCodingRegion"; //eg. true
public static final String OUTPUT_FRAME = "frame"; //eg. 0,1,2
public static final String OUTPUT_POSITION_TYPE = "positionType"; //eg. utr5, cds, utr3, intron, intergenic
public static final String OUTPUT_MRNA_COORD = "mrnaCoord"; //1-based offset within the transcript
public static final String OUTPUT_CODING_COORD = "codingCoord"; //1-based offset within the cds region
public static final String OUTPUT_SPLICE_DISTANCE = "minSpliceDistance"; //eg. 40
public static final String OUTPUT_CODON_NUMBER = "codonCoord"; //eg. 20
public static final String OUTPUT_REFERENCE_CODON = "referenceCodon";
public static final String OUTPUT_REFERENCE_AA = "referenceAA";
public static final String OUTPUT_VARIANT_CODON = "variantCodon";
public static final String OUTPUT_VARIANT_AA = "variantAA";
public static final String OUTPUT_CHANGES_AMINO_ACID = "changesAA"; //eg. true
public static final String OUTPUT_CODING_COORD_STR = "coding_coord_str";
public static final String OUTPUT_PROTEIN_COORD_STR = "protein_coord_str";
public static final String OUTPUT_INTRON_COORD_STR = "intron_coord_str";
public static final String[] OUTPUT_COLUMN_NAMES = new String[] {
OUTPUT_SORT_KEY,
OUTPUT_CHRPOS,
OUTPUT_HAPLOTYPE_REFERENCE,
OUTPUT_HAPLOTYPE_ALTERNATE,
OUTPUT_HAPLOTYPE_STRAND,
OUTPUT_GENE_NAME,
OUTPUT_TRANSCRIPT_ACCESSION,
OUTPUT_POSITION_TYPE,
OUTPUT_FRAME,
OUTPUT_MRNA_COORD,
OUTPUT_CODING_COORD,
OUTPUT_CODON_NUMBER,
OUTPUT_SPLICE_DISTANCE,
OUTPUT_REFERENCE_CODON,
OUTPUT_REFERENCE_AA,
OUTPUT_VARIANT_CODON,
OUTPUT_VARIANT_AA,
OUTPUT_CHANGES_AMINO_ACID,
OUTPUT_CODING_COORD_STR,
OUTPUT_PROTEIN_COORD_STR,
OUTPUT_INTRON_COORD_STR,
OUTPUT_IN_CODING_REGION,
};
/**
* Container for all data fields from a single refGene.txt row.
*/
static class RefGeneTranscriptRecord
{
public static final String REFGENE_NAME = "name"; //eg. NM_021649
public static final String REFGENE_STRAND = "strand"; //eg. +
public static final String REFGENE_TXSTART = "txStart";
public static final String REFGENE_TXEND = "txEnd";
public static final String REFGENE_CDS_START = "cdsStart";
public static final String REFGENE_CDS_END = "cdsEnd";
public static final String REFGENE_EXON_COUNT = "exonCount";
public static final String REFGENE_EXON_STARTS = "exonStarts";
public static final String REFGENE_EXON_ENDS = "exonEnds";
public static final String REFGENE_EXON_FRAMES = "exonFrames";
public static final String REFGENE_NAME2 = "name2"; //eg. TICAM2
/**
* This StringBuffer accumulates the entire transcript sequence.
* This buffer is used instead of using the GATK window mechanism
* because arbitrary-length look-aheads and look-behinds are needed to deal
* with codons that span splice-junctions. The window mechanism
* requires hard-coding the window size, which would translate into a
* limit on maximum supported intron size. To avoid this, the
* sequence is accumulated as the transcript is scanned left-to-right.
* Then, all calculations are performed at the end.
*/
public StringBuilder txSequence; //the sequence of the entire transcript in order from 5' to 3'
public StringBuilder cdsSequence; //the protein coding sequence (with introns removed) in order from 5' to 3'
public boolean positiveStrand;
public String name; //eg. NM_021649
public String name2; //eg. TICAM2
public String txChrom; //The chromosome
public long txStart;
public long txEnd;
public long cdsStart;
public long cdsEnd;
public long[] exonStarts;
public long[] exonEnds;
public int[] exonFrames;
private AnnotatorROD rod;
public RefGeneTranscriptRecord(final AnnotatorROD refGeneRod) {
this.rod = refGeneRod;
//String binStr = rod.get("bin");
//String idStr = refGeneRod.get("id"); //int(10) unsigned range Unique identifier ( usually 0 for some reason - even for translated )
String strandStr = refGeneRod.get("strand");
if(strandStr.equals("+")) {
positiveStrand = true;
} else if(strandStr.equals("-")) {
positiveStrand = false;
} else {
throw new IllegalArgumentException("refGene record contains unexpected strand value: \"" + strandStr + "\"");
}
name = refGeneRod.get("name");
name2 = refGeneRod.get("name2");
//String txStartStr = refGeneRod.get(REFGENE_TXSTART); //These fields are used to generate column 1 of the ROD file (eg. they get turned into chr:txStart-txStop)
//String txEndStr = refGeneRod.get(REFGENE_TXEND);
final GenomeLoc loc = refGeneRod.getLocation();
txChrom = loc.getContig();
txStart = loc.getStart();
txEnd = loc.getStop();
txSequence = new StringBuilder( (int) (txEnd - txStart + 1) ); //the sequence of the entire transcript in order from 5' to 3'
cdsSequence = new StringBuilder( (int) (cdsEnd - cdsStart + 1) ); //TODO reduce init size by size of introns
String cdsStartStr = refGeneRod.get(REFGENE_CDS_START);
String cdsEndStr = refGeneRod.get(REFGENE_CDS_END);
cdsStart = Long.parseLong(cdsStartStr);
cdsEnd = Long.parseLong(cdsEndStr);
String exonCountStr = refGeneRod.get(REFGENE_EXON_COUNT);
String exonStartsStr = refGeneRod.get(REFGENE_EXON_STARTS);
String exonEndsStr = refGeneRod.get(REFGENE_EXON_ENDS);
String exonFramesStr = refGeneRod.get(REFGENE_EXON_FRAMES);
String[] exonStartStrs = exonStartsStr.split(",");
String[] exonEndStrs = exonEndsStr.split(",");
String[] exonFrameStrs = exonFramesStr.split(",");
int exonCount = Integer.parseInt(exonCountStr);
if(exonCount != exonStartStrs.length || exonCount != exonEndStrs.length || exonCount != exonFrameStrs.length)
{
throw new RuntimeException("exonCount != exonStarts.length || exonCount != exonEnds.length || exonCount != exonFrames.length. Exon starts: " + exonStartsStr + ", Exon ends: " + exonEndsStr + ", Exon frames: " + exonFramesStr + ", Exon count: " + exonCountStr +". refGeneRod = " + refGeneRod);
}
exonStarts = new long[exonCount];
exonEnds = new long[exonCount];
exonFrames = new int[exonCount];
for(int i = 0; i < exonCount; i++) {
exonStarts[i] = Long.parseLong(exonStartStrs[i]);
exonEnds[i] = Long.parseLong(exonEndStrs[i]);
exonFrames[i] = Integer.parseInt(exonFrameStrs[i]);
}
}
/**
* Takes a genomic position on the same contig as the transcript, and
* returns true if this position falls within an exon.
*/
public boolean isWithinExon(final long genomPosition) {
for(int i = 0; i < exonStarts.length; i++) {
final long curStart = exonStarts[i];
if(genomPosition < curStart) {
return false;
}
final long curStop = exonEnds[i];
if(genomPosition <= curStop) {
return true;
}
}
return false;
}
/**
* Computes the distance to the nearest splice-site.
* Returns 1 when down-stream of splice-juction, -1 when upstream
*/
public int distanceToSpliceSite(final long genomPosition) {
int prevDistance = Integer.MAX_VALUE;
int curDistance;
for(int i = 0; i < exonStarts.length; i++) {
final long curStart = exonStarts[i];
curDistance = (int) (curStart-genomPosition);
if(genomPosition < curStart) {
return Math.min(prevDistance, curDistance);
} else {
prevDistance = (int) (genomPosition - curStart) + 1;
}
final long curStop = exonEnds[i];
curDistance = (int) (curStop-genomPosition) + 1;
if(genomPosition <= curStop) {
return Math.min(prevDistance, curDistance);
} else {
prevDistance = (int) (genomPosition - curStop);
}
}
return prevDistance; //out of exons. return genomPosition-curStop
}
/**
* Returns true if this is a coding transcript (eg. is translated
* into proteins). Returns false for non-coding RNA.
*/
public boolean isProteinCodingTranscript() {
return cdsStart < cdsEnd;
}
@Override
public String toString() {
return rod.toString();
}
}
protected int transcriptsProcessedCounter = 0;
/** Possible values for the "POSITION_TYPE" output column. */
enum PositionType {
intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron
}
/** Output stream for computed results */
private PrintStream outputStream;
/**
* Prepare the output file and the list of available features.
*/
public void initialize() {
//init the output file
String outputFilename = getToolkit().getArguments().outFileName;
if(outputFilename == null ) {
outputStream = System.out;
} else {
try {
outputStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFilename), OUTPUT_STREAM_BUFFER_SIZE));
} catch(Exception e) {
throw new StingException("Unable to open output file for writing: " + outputFilename);
}
}
outputStream.println(Utils.join(AnnotatorROD.DELIMITER, OUTPUT_COLUMN_NAMES));
}
/**
* Initialize the number of loci processed to zero.
*
* @return 0
*/
public Integer reduceInit() { return 0; }
/**
* We want reads that span deletions
*
* @return true
*/
public boolean includeReadsWithDeletionAtLoci() { return true; }
/**
* For each site of interest, generate the appropriate fields.
*
* @param tracker the meta-data tracker
* @param ref the reference base
* @param context the context for the given locus
* @return 1 if the locus was successfully processed, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return 0;
Collection<RODRecordList> rods = tracker.getBoundRodTracks();
if(rods.size() == 0) {
//if there's nothing overlapping this locus, skip it.
return 0;
}
final List<Object> refGeneRODs = (List<Object>) tracker.getReferenceMetaData("refgene");
for(Object refGeneRodObject : refGeneRODs) {
if(! (refGeneRodObject instanceof AnnotatorROD) ) {
throw new RuntimeException("The 'refgene' -B arg must have the type: 'AnnotatorInputTable'.");
}
AnnotatorROD refGeneRod = (AnnotatorROD) refGeneRodObject;
RefGeneTranscriptRecord parsedRefGeneRod = (RefGeneTranscriptRecord) refGeneRod.getTemporaryAttribute("parsedRefGeneRod");
if( parsedRefGeneRod == null ) {
parsedRefGeneRod = new RefGeneTranscriptRecord(refGeneRod);
refGeneRod.setTemporaryAttribute("parsedRefGeneRod", parsedRefGeneRod);
}
if(parsedRefGeneRod.positiveStrand) {
parsedRefGeneRod.txSequence.append(ref.getBase());
} else {
parsedRefGeneRod.txSequence.insert(0, ref.getBase());
}
final long position = ref.getLocus().getStart();
if(parsedRefGeneRod.isProteinCodingTranscript() && position >= parsedRefGeneRod.cdsStart && position <= parsedRefGeneRod.cdsEnd && parsedRefGeneRod.isWithinExon(position) ) {
if(parsedRefGeneRod.positiveStrand) {
parsedRefGeneRod.cdsSequence.append(ref.getBase());
} else {
parsedRefGeneRod.cdsSequence.insert(0, ref.getBase());
}
}
if(position == parsedRefGeneRod.txEnd) {
//we've reached the end of the transcript - compute all data and write it out.
generateOutputRecordsForROD(parsedRefGeneRod);
transcriptsProcessedCounter++;
if(transcriptsProcessedCounter % 100 == 0) {
System.err.println(new Date() + ": " + transcriptsProcessedCounter + " transcripts processed");
}
}
}
return 1;
}
private static final char[] ALLELES = {'A','C','G','T'};
private void generateOutputRecordsForROD(RefGeneTranscriptRecord parsedRefGeneRod) {
//Transcripts that don't produce proteins are indicated in refGene by cdsStart == cdsEnd
//These will be handled by generating only one record, with haplotypeAlternate == "*".
final boolean isProteinCodingTranscript = parsedRefGeneRod.isProteinCodingTranscript();
final boolean positiveStrand = parsedRefGeneRod.positiveStrand; //shorten the name
if(isProteinCodingTranscript && parsedRefGeneRod.cdsSequence.length() % 3 != 0) {
System.err.println("WARNING: The following record has " + parsedRefGeneRod.cdsSequence.length() + " nucleotides in its CDS region, which is not divisible by 3. Skipping... \n" + parsedRefGeneRod.toString());
//TODO figure out why all CDS lengths aren't in multiples of 3 nucleotides - Sarah says its ok to thow these out.
return;
}
//here, 5' and 3' are according to the DNA (not on the RNA)
int frame = 0; //the frame of the current position
int txOffset_from5 = 1; //goes from txStart 5' to txEnd 3' for both + and - strand
int proteinOffset_from5 = 1; //goes from cdsStart 5' to cdsEnd 3' for both + and - strand
int codonOffset_from5 = 0; //goes from cdsStart 5' to cdsEnd 3' for both + and - strand - counts the number of codons (this count is 1-based even though its set to 0 here.)
char[] currentCodon_5to3 = null; //holds the current RNA codon - 5' to 3'
final long txStart_5prime = positiveStrand ? parsedRefGeneRod.txStart : parsedRefGeneRod.txEnd; //1-based, inclusive
final long txEnd_3prime = positiveStrand ? parsedRefGeneRod.txEnd : parsedRefGeneRod.txStart; //1-based, inclusive
final int increment_5to3 = positiveStrand ? 1 : -1;
final int strandSign = increment_5to3; //alias
final long cdsStart_5prime = positiveStrand ? parsedRefGeneRod.cdsStart : parsedRefGeneRod.cdsEnd; //1-based, inclusive
final long cdsEnd_3prime = positiveStrand ? parsedRefGeneRod.cdsEnd : parsedRefGeneRod.cdsStart ; //1-based, inclusive
//compute chromosome key (later used to sort the output file)
boolean mitochondrial = false;
long chromKey;
final char lastChromChar = parsedRefGeneRod.txChrom.charAt(parsedRefGeneRod.txChrom.length() -1);
switch( Character.toLowerCase(lastChromChar) ) {
case 'm':
chromKey = 0;
mitochondrial = true;
break;
case 'x':
chromKey = 23;
break;
case 'y':
chromKey = 24;
break;
default:
chromKey = Integer.parseInt(parsedRefGeneRod.txChrom.substring(3));
break;
}
chromKey++; //shift so there's no zero (otherwise, multiplication is screwed up in the next step)
chromKey *= 100000000000L;
for(long txCoord_5to3 = txStart_5prime; txCoord_5to3 != txEnd_3prime + increment_5to3; txCoord_5to3 += increment_5to3)
{
//get the reference sequence
final char haplotypeReference = parsedRefGeneRod.txSequence.charAt( txOffset_from5 - 1 );
//whether the current txCoord_5to3 is within an exon
final boolean isWithinExon = parsedRefGeneRod.isWithinExon(txCoord_5to3); //TODO if necessary, this can be sped up by keeping track of current exon/intron
//figure out what region the current position is in.
PositionType positionType;
if(isProteinCodingTranscript)
{
if(isWithinExon)
{
if( strandSign*(txCoord_5to3 - cdsStart_5prime) < 0 ) {
positionType = PositionType.utr5;
} else if( strandSign*(txCoord_5to3 - cdsEnd_3prime) > 0 ) {
positionType = PositionType.utr3;
} else {
positionType = PositionType.CDS;
}
} else {
positionType = PositionType.intron;
}
} else {
if(isWithinExon) {
positionType = PositionType.non_coding_exon;
} else {
positionType = PositionType.non_coding_intron;
}
}
//compute current codon
if(positionType == PositionType.CDS && frame == 0)
{
if(currentCodon_5to3 == null) {
currentCodon_5to3 = new char[3];
}
codonOffset_from5++;
currentCodon_5to3[0] = parsedRefGeneRod.cdsSequence.charAt( proteinOffset_from5 - 1 ); //subtract 1 to go to zero-based coords
currentCodon_5to3[1] = parsedRefGeneRod.cdsSequence.charAt( proteinOffset_from5 );
currentCodon_5to3[2] = parsedRefGeneRod.cdsSequence.charAt( proteinOffset_from5 + 1);
if(!positiveStrand) {
currentCodon_5to3[0] = BaseUtils.simpleComplement(currentCodon_5to3[0]);
currentCodon_5to3[1] = BaseUtils.simpleComplement(currentCodon_5to3[1]);
currentCodon_5to3[2] = BaseUtils.simpleComplement(currentCodon_5to3[2]);
}
}
final String distanceToSpliceSiteString = Integer.toString(parsedRefGeneRod.distanceToSpliceSite(txCoord_5to3));
for(final char haplotypeAlternate : ALLELES )
{
final LinkedHashMap<String, String> outputLineFields = new LinkedHashMap<String, String>();
outputLineFields.put(OUTPUT_SORT_KEY, Long.toString( chromKey + txCoord_5to3) );
outputLineFields.put(OUTPUT_CHRPOS, parsedRefGeneRod.txChrom + ":" + txCoord_5to3);
outputLineFields.put(OUTPUT_HAPLOTYPE_REFERENCE, Character.toString( haplotypeReference ) );
outputLineFields.put(OUTPUT_HAPLOTYPE_ALTERNATE, isProteinCodingTranscript ? Character.toString( haplotypeAlternate ) : "*");
outputLineFields.put(OUTPUT_HAPLOTYPE_STRAND, positiveStrand ? "+" : "-");
outputLineFields.put(OUTPUT_GENE_NAME, parsedRefGeneRod.name2 );
outputLineFields.put(OUTPUT_TRANSCRIPT_ACCESSION, parsedRefGeneRod.name );
outputLineFields.put(OUTPUT_POSITION_TYPE, positionType.toString() );
outputLineFields.put(OUTPUT_MRNA_COORD, Integer.toString(txOffset_from5) );
outputLineFields.put(OUTPUT_SPLICE_DISTANCE, distanceToSpliceSiteString );
if(isProteinCodingTranscript)
{
outputLineFields.put(OUTPUT_IN_CODING_REGION, Boolean.toString(positionType == PositionType.CDS) );
if(isWithinExon) {
outputLineFields.put(OUTPUT_CODING_COORD, Integer.toString(proteinOffset_from5) );
if(positionType == PositionType.CDS) {
final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame) );
outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonOffset_from5) );
outputLineFields.put(OUTPUT_PROTEIN_COORD_STR, "p."+ Integer.toString(codonOffset_from5) );
outputLineFields.put(OUTPUT_CODING_COORD_STR, "c."+ Integer.toString(proteinOffset_from5) );
outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon );
final String refAA = AminoAcidTable.getAA3LetterCode(referenceCodon, mitochondrial);
outputLineFields.put(OUTPUT_REFERENCE_AA, refAA);
final char temp = currentCodon_5to3[frame];
currentCodon_5to3[frame] = haplotypeAlternate;
final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
final String variantAA = AminoAcidTable.getAA3LetterCode(variantCodon, mitochondrial);
outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon );
outputLineFields.put(OUTPUT_VARIANT_AA, variantAA);
currentCodon_5to3[frame] = temp;
outputLineFields.put(OUTPUT_CHANGES_AMINO_ACID, Boolean.toString(!refAA.equals(variantAA)));
}
}
}
//write out the record
StringBuilder outputLine = new StringBuilder();
for( String column : OUTPUT_COLUMN_NAMES) {
if(outputLine.length() != 0) {
outputLine.append(AnnotatorROD.DELIMITER);
}
String value = outputLineFields.get(column);
outputLine.append(value != null ? value : "");
}
outputStream.println(outputLine);
if( !isProteinCodingTranscript ) {
//need only one record for this position, instead of 4 (one for each allele)
break;
}
} //ALLELE for-loop
txOffset_from5++;
if(positionType == PositionType.CDS) {
proteinOffset_from5++;
frame = (frame + 1) % 3;
}
} // l for-loop
} //method close
/**
* Increment the number of loci processed.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return the new number of loci processed.
*/
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
/**
* Tell the user the number of loci processed and close out the new variants file.
*
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
out.printf("Processed %d loci.\n", result);
outputStream.close();
}
}