diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 34a04a02d..10893ce57 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -27,7 +27,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod // Reference sequence chromosome or scaffold // Start and stop positions in chrom - public String name; // Reference SNP identifier or Affy SNP name + public String RS_ID; // Reference SNP identifier or Affy SNP name public String strand; // Which DNA strand contains the observed alleles public String refBases; // the reference base according to NCBI, in the dbSNP file @@ -206,19 +206,22 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod // formatting // // ---------------------------------------------------------------------- + + public String getRS_ID() { return RS_ID; } + public String toString() { return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d", getLocation().getContig(), getLocation().getStart(), getLocation().getStop() + 1, - name, strand, refBases, observed, molType, + RS_ID, strand, refBases, observed, molType, varType, validationStatus, avHet, avHetSE, func, locType, weight); } public String toSimpleString() { - return String.format("%s:%s:%s", name, observed, strand); + return String.format("%s:%s:%s", RS_ID, observed, strand); } public String toMediumString() { - String s = String.format("%s:%s:%s", getLocation().toString(), name, Utils.join("",this.getAlleleList())); + String s = String.format("%s:%s:%s", getLocation().toString(), RS_ID, Utils.join("",this.getAlleleList())); if (isSNP()) s += ":SNP"; if (isIndel()) s += ":Indel"; if (isHapmap()) s += ":Hapmap"; @@ -229,7 +232,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod public String repl() { return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d", 585, getLocation().getContig(), getLocation().getStart() - 1, getLocation().getStop(), - name, strand, refBases, refBases, observed, molType, + RS_ID, strand, refBases, refBases, observed, molType, varType, validationStatus, avHet, avHetSE, func, locType, weight); } @@ -240,7 +243,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based loc = GenomeLocParser.parseGenomeLoc(contig, start, Math.max(start, stop - 1)); - name = parts[4]; + RS_ID = parts[4]; strand = parts[6]; refBases = parts[7]; if (strand == "-") diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index c0045d83f..9443e476e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; @@ -52,6 +53,11 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); } + if ( locusdata instanceof IDBacked ) { + rodDbSNP dbsnp = getDbSNP(tracker); + if ( dbsnp != null ) + ((IDBacked)locusdata).setID(dbsnp.getRS_ID()); + } if ( locusdata instanceof SLODBacked ) { // calculate strand score diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 13d7f3312..6b2be5560 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; @@ -99,6 +100,25 @@ public abstract class GenotypeCalculationModel implements Cloneable { AlignmentContext context, DiploidGenotypePriors priors); + /** + * @param tracker rod data + * + * @return the dbsnp rod if there is one at this position + */ + public static rodDbSNP getDbSNP(RefMetaDataTracker tracker) { + return rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null)); + } + + /** + * Determine whether we're at a Hapmap site + * + * @param tracker the meta data tracker + * + * @return true if we're at a Hapmap site, false if otherwise + */ + private static boolean isHapmapSite(RefMetaDataTracker tracker) { + return tracker.getTrackData("hapmap", null) != null; + } /** * Create the mapping from sample to alignment contexts. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index a5dedad3c..f9a3f1e0c 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; @@ -49,7 +50,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // run joint estimation for the full GL contexts initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL); calculateAlleleFrequencyPosteriors(ref, context.getLocation()); - return createCalls(ref, contexts, context.getLocation()); + return createCalls(tracker, ref, contexts, context.getLocation()); } private void initializeAlleleFrequencies(int numSamples) { @@ -275,7 +276,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo verboseWriter.println(); } - private Pair, GenotypeLocusData> createCalls(char ref, HashMap contexts, GenomeLoc loc) { + private Pair, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc) { // first, find the alt allele with maximum confidence int indexOfMax = 0; char baseOfMax = ref; @@ -333,7 +334,12 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo if ( locusdata instanceof AlleleFrequencyBacked ) { ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess); } - if ( locusdata instanceof AlleleBalanceBacked ) { + if ( locusdata instanceof IDBacked ) { + rodDbSNP dbsnp = getDbSNP(tracker); + if ( dbsnp != null ) + ((IDBacked)locusdata).setID(dbsnp.getRS_ID()); + } + if ( locusdata instanceof ArbitraryFieldsBacked) { ArrayList refBalances = new ArrayList(); ArrayList onOffBalances = new ArrayList(); ArrayList weights = new ArrayList(); @@ -384,8 +390,10 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo normalizedOnOffRatio += weights.get(i) * onOffBalances.get(i); } - ((AlleleBalanceBacked)locusdata).setRefRatio(normalizedRefRatio); - ((AlleleBalanceBacked)locusdata).setOnOffRatio(normalizedOnOffRatio); + HashMap fields = new HashMap(); + fields.put("AB", String.format("%.2f", normalizedRefRatio)); + fields.put("OO", String.format("%.2f", normalizedOnOffRatio)); + ((ArbitraryFieldsBacked)locusdata).setFields(fields); } } if ( locusdata instanceof SLODBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index b6f757964..de125120a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; @@ -90,7 +91,12 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); } - if ( locusdata instanceof AlleleBalanceBacked ) { + if ( locusdata instanceof IDBacked ) { + rodDbSNP dbsnp = getDbSNP(tracker); + if ( dbsnp != null ) + ((IDBacked)locusdata).setID(dbsnp.getRS_ID()); + } + if ( locusdata instanceof ArbitraryFieldsBacked) { DiploidGenotype bestGenotype = DiploidGenotype.values()[bestIndex]; // we care only about het calls @@ -107,8 +113,10 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation totalCount += counts[i]; // set the ratios - ((AlleleBalanceBacked)locusdata).setRefRatio((double)refCount / (double)(refCount + altCount)); - ((AlleleBalanceBacked)locusdata).setOnOffRatio((double)(refCount + altCount) / (double)totalCount); + HashMap fields = new HashMap(); + fields.put("AB", String.format("%.2f", (double)refCount / (double)(refCount + altCount))); + fields.put("OO", String.format("%.2f",(double)(refCount + altCount) / (double)totalCount)); + ((ArbitraryFieldsBacked)locusdata).setFields(fields); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 8b7d7ef06..73182923d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.BaseUtils; @@ -195,28 +194,6 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL return new AlignmentContext(context.getLocation(), newReads, newOffsets); } - /** - * Determine whether we're at a Hapmap site - * - * @param tracker the meta data tracker - * - * @return true if we're at a Hapmap site, false if otherwise - */ - private static boolean isHapmapSite(RefMetaDataTracker tracker) { - return tracker.getTrackData("hapmap", null) != null; - } - - /** - * Determine whether we're at a dbSNP site - * - * @param tracker the meta data tracker - * - * @return true if we're at a dbSNP site, false if otherwise - */ - private static boolean isDbSNPSite(RefMetaDataTracker tracker) { - return rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null)) != null; - } - public Integer reduceInit() { return 0; } public Integer reduce(Pair, GenotypeLocusData> value, Integer sum) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index da976cee6..d471017ab 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -182,7 +182,7 @@ public class VariantsToVCF extends RefWalker { return new VCFRecord(ref.getBase(), context.getContig(), (int) context.getPosition(), - (dbsnp == null) ? "." : dbsnp.name, + (dbsnp == null) ? "." : dbsnp.getRS_ID(), alts, snpQual > 99 ? 99 : (int) snpQual, "0", diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleBalanceBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleBalanceBacked.java deleted file mode 100755 index 404c01857..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/AlleleBalanceBacked.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -/** - * @author ebanks - * Interface AlleleBalanceBacked - * - * this interface indicates that the genotype is - * backed up by allele balance information. - */ -public interface AlleleBalanceBacked { - - /** - * - * @return returns the ratio of ref/(ref+alt) bases for hets - */ - public double getRefRatio(); - - /** - * - * @param ratio the ref/(ref+alt) ratio - */ - public void setRefRatio(double ratio); - - /** - * - * @return returns the ratio of (ref+alt)/total bases for hets - */ - public double getOnOffRatio(); - - /** - * - * @param ratio the (ref+alt)/total ratio - */ - public void setOnOffRatio(double ratio); -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ArbitraryFieldsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ArbitraryFieldsBacked.java new file mode 100755 index 000000000..df57a35b2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/ArbitraryFieldsBacked.java @@ -0,0 +1,25 @@ +package org.broadinstitute.sting.utils.genotype; + +import java.util.Map; + +/** + * @author ebanks + * Interface AlleleBalanceBacked + * + * this interface indicates that the genotype is + * backed up by allele balance information. + */ +public interface ArbitraryFieldsBacked { + + /** + * + * @return returns the list of arbitrary fields to emit + */ + public Map getFields(); + + /** + * + * @param fields a list of arbitrary fields to emit + */ + public void setFields(Map fields); +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/IDBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/IDBacked.java new file mode 100755 index 000000000..084925fe2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/IDBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface IDBacked + * + * this interface indicates that the genotype is + * backed up by sample information. + */ +public interface IDBacked { + + /** + * + * @return returns the ID for this genotype + */ + public String getID(); + + /** + * + * @param id the ID for this genotype + */ + public void setID(String id); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java index 4fb9e02f1..dcf869936 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -3,6 +3,9 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.genotype.*; +import java.util.HashMap; +import java.util.Map; + /** * @author ebanks *

@@ -10,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.*; *

* represents the meta data for a genotype object. */ -public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked, AlleleBalanceBacked { +public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked { // the discovery lod score private double mConfidence = 0.0; @@ -27,9 +30,11 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked // the ref base private char mRefBase; - // the various allele ratios - private double mRefRatio = 0.0; - private double mOnOffRatio = 0.0; + // the id + private String mID; + + // the various info field values + private Map mInfoFields; /** * create a basic genotype meta data pbject, given the following fields @@ -111,38 +116,24 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked } /** - * @return returns true if the ref/(ref+alt) ratio for this genotype has been set + * @return returns the dbsnp id for this genotype */ - public boolean hasRefRatio() { - return mRefRatio > 0.0; + public String getID() { + return mID; + } + + public void setID(String id) { + mID = id; } /** - * @return returns the ref/(ref+alt) ratio for this genotype + * @return returns te arbitrary info fields */ - public double getRefRatio() { - return mRefRatio; + public Map getFields() { + return mInfoFields; } - public void setRefRatio(double ratio) { - mRefRatio = ratio; - } - - /** - * @return returns true if the (ref+alt)/total ratio for this genotype has been set - */ - public boolean hasOnOffRatio() { - return mOnOffRatio > 0.0; - } - - /** - * @return returns the (ref+alt)/total ratio for this genotype - */ - public double getOnOffRatio() { - return mOnOffRatio; - } - - public void setOnOffRatio(double ratio) { - mOnOffRatio = ratio; + public void setFields(Map fields) { + mInfoFields = new HashMap(fields); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index baeb2b89f..4794fb5f7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -148,17 +148,24 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { throw new IllegalArgumentException("Genotype array passed to VCFGenotypeWriterAdapter contained Genotypes not in the VCF header"); } + // info fields Map infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params); infoFields.put("DP", String.format("%d", totalReadDepth)); + // q-score double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence(); // min Q-score is zero qual = Math.max(qual, 0); + // dbsnp id + String dbSnpID = null; + if ( locusdata != null ) + dbSnpID = ((VCFGenotypeLocusData)locusdata).getID(); + VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), params.getContig(), params.getPosition(), - ".", + (dbSnpID == null ? "." : dbSnpID), params.getAlternateBases(), qual, "0", @@ -182,10 +189,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { if ( locusdata != null ) { infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); - if ( locusdata.hasRefRatio() ) - infoFields.put("AB", String.format("%.2f", locusdata.getRefRatio())); - if ( locusdata.hasOnOffRatio() ) - infoFields.put("OO", String.format("%.2f", locusdata.getOnOffRatio())); + Map otherFields = locusdata.getFields(); + if ( otherFields != null ) { + infoFields.putAll(otherFields); + } } infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); return infoFields;