diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/AlleleFrequencyWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/AlleleFrequencyWalker.java index ba516585f..d6e98ae4b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/AlleleFrequencyWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/AlleleFrequencyWalker.java @@ -1,22 +1,21 @@ package org.broadinstitute.sting.playground.gatk.walkers; //import org.broadinstitute.sting.gatk.iterators.LocusIterator; +import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; import org.broadinstitute.sting.playground.utils.AlleleMetrics; -import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; -import org.apache.log4j.Logger; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.cmdLine.Argument; -import java.util.List; -import java.util.Arrays; -import java.util.Random; import java.io.PrintStream; -import java.io.DataInputStream; +import java.util.Arrays; +import java.util.List; +import java.util.Random; public class AlleleFrequencyWalker extends LocusWalker// implements AllelicVariant { @@ -140,7 +139,7 @@ public class AlleleFrequencyWalker extends LocusWalker result is %s", alleleFreq)); - if (LOG_METRICS) metrics.nextPosition(alleleFreq, tracker); + //if (LOG_METRICS) metrics.nextPosition(alleleFreq, tracker); return alleleFreq; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 92f0b4685..18e4ce1b6 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -1,19 +1,21 @@ package org.broadinstitute.sting.playground.gatk.walkers; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodGFF; -import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.ListUtils; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.GenotypeCall; -import java.util.List; -import java.util.ArrayList; -import java.io.PrintStream; -import java.io.FileNotFoundException; import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -49,7 +51,7 @@ public class CoverageEvalWalker extends LocusWalker, String> { System.exit(-1); } - String header = GELI_OUTPUT_FORMAT ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader(); + String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT"; variantsOut.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1)); } @@ -86,10 +88,11 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); LocusContext subContext = new LocusContext(context.getLocation(), sub_reads, sub_offsets); - AlleleFrequencyEstimate alleleFreq = SSG.map(tracker, ref, subContext); + GenotypeCall call = SSG.map(tracker, ref, subContext); - if (alleleFreq != null) { - GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+alleleFreq.callType()+"\t"+alleleFreq.asGeliString()); + String callType = (call.isVariation()) ? ((call.getBestVrsRef().first.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; + if (call != null) { + GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } } } @@ -110,6 +113,41 @@ public class CoverageEvalWalker extends LocusWalker, String> { return ""; } + + // a method to support getting the geli string, since the AlleleFrequencyEstimate is going away + public String toGeliString (GenotypeCall locus) { + if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)"); + + + // this is to perserve the format string that we used to use + double[] likelihoods = new double[10]; + int index = 0; + List lt = locus.getLexigraphicallySortedGenotypes(); + for (Genotype G: lt) { + likelihoods[index] = G.getLikelihood(); + index++; + } + + return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + locus.getLocation().getContig(), + locus.getLocation().getStart(), + locus.getReferencebase(), + locus.getReadDepth(), + -1, + locus.getGenotypes().get(0).getBases(), + locus.getBestVrsRef().second.getScore(), + locus.getBestVrsNext().second.getScore(), + likelihoods[0], + likelihoods[1], + likelihoods[2], + likelihoods[3], + likelihoods[4], + likelihoods[5], + likelihoods[6], + likelihoods[7], + likelihoods[8], + likelihoods[9]); + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java deleted file mode 100644 index ec28cddb7..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/GenotypeLikelihoodsWalker.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.ReadFilters; -import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; - -import java.io.File; -import java.util.List; - - -/** - * - * @author aaron - * - * Class LikelihoodWalker - * - * a simple walker to calculate the genotype likelihoods - */ -@ReadFilters(ZeroMappingQualityReadFilter.class) -public class GenotypeLikelihoodsWalker extends LocusWalker { - @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE; - @Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF; - - - @Override - public LikelihoodWrapper map(RefMetaDataTracker tracker, char ref, LocusContext context) { - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); - String bases = pileup.getBases(); - List reads = context.getReads(); - List offsets = context.getOffsets(); - double rmsSum = 0.0; - // Handle single-base polymorphisms. - GenotypeLikelihoods G = new GenotypeLikelihoods(); - for (int i = 0; i < reads.size(); i++) { - SAMRecord read = reads.get(i); - int offset = offsets.get(i); - rmsSum += (read.getMappingQuality() * read.getMappingQuality()); - G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); - } - - // our return - LikelihoodWrapper wrap = new LikelihoodWrapper(); - - return wrapLikelihoods(ref, context, reads, rmsSum, G, wrap); - } - - /** - * wrap the likelihood values in with the other data we'll need - * @param ref the ref base - * @param context the locus context - * @param reads the reads - * @param rmsSum the rms square total (not the actual rms yet) - * @param g the genotypeLikelihoods - * @param wrap the object to place the values into - * @return a likelihood wrapper - */ - private LikelihoodWrapper wrapLikelihoods(char ref, LocusContext context, List reads, double rmsSum, GenotypeLikelihoods g, LikelihoodWrapper wrap) { - wrap.obj = new LikelihoodObject(); - wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.LOG); - for (int x = 0; x < GenotypeLikelihoods.genotypes.length; x++) { - wrap.obj.setLikelihood(LikelihoodObject.GENOTYPE.valueOf(GenotypeLikelihoods.genotypes[x]), g.likelihoods[x]*10.0); - } - wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); - wrap.loc = GenomeLocParser.getContigInfo(context.getContig()); - wrap.readDepth = reads.size(); - float rms = (float)(Math.sqrt(rmsSum/reads.size())); - wrap.rms = (rms > 255) ? 255 : rms; - wrap.ref = ref; - wrap.position = context.getLocation().getStart(); - return wrap; - } - - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - @Override - public GenotypeWriter reduceInit() { - return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(),VARIANTS_FILE); - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * - * @return accumulator with result of the map taken into account. - */ - @Override - public GenotypeWriter reduce(LikelihoodWrapper value, GenotypeWriter sum) { - - sum.addGenotypeCall(value.loc,(int)value.position,value.rms,value.ref,value.readDepth,value.obj); - return sum; - } - - /** Close the variant file. */ - public void onTraversalDone(GenotypeWriter result) { - result.close(); - } -} - - -class LikelihoodWrapper { - public LikelihoodObject obj; - public SAMSequenceRecord loc; - public long position; - public float rms; - public char ref; - public int readDepth; -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index 1b06e1379..1582bd9f3 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -2,32 +2,27 @@ package org.broadinstitute.sting.playground.gatk.walkers; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; -import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; import org.broadinstitute.sting.playground.utils.AlleleMetrics; import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; import org.broadinstitute.sting.playground.utils.IndelLikelihood; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.BasicPileup; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; -import java.io.FileNotFoundException; -import java.io.PrintStream; import java.util.List; -import java.util.ArrayList; @ReadFilters(ZeroMappingQualityReadFilter.class) -public class SingleSampleGenotyper extends LocusWalker { +public class SingleSampleGenotyper extends LocusWalker { // Control output settings @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE; @Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr"); @@ -35,11 +30,9 @@ public class SingleSampleGenotyper extends LocusWalker reads = context.getReads(); - List offsets = context.getOffsets(); - - // Handle indels, but don't do anything with the result yet. - IndelLikelihood I = (CALL_INDELS) ? callIndel(context, reads, offsets) : null; - //IndelLikelihood I = (CALL_INDELS) ? callIndel(context, context.getReads(), context.getOffsets()) : null; - // Handle single-base polymorphisms. - GenotypeLikelihoods G = callGenotype(tracker, ref, pileup, reads, offsets); - //GenotypeLikelihoods G = callGenotype(tracker, ref, context); + GenotypeLikelihoods G = callGenotype(tracker); + GenotypeLocus geno = G.callGenotypes(tracker, ref, pileup); - return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods, sample_name); + return geno; } /** * Calls the underlying, single locus genotype of the sample * * @param tracker the meta data tracker - * @param ref the reference base - * @param pileup the pileup object for the given locus - * @param reads the reads that overlap this locus - * @param offsets the offsets per read that identify the base at this locus * * @return the likelihoods per genotype */ - private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup, List reads, List offsets) { + private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker) { GenotypeLikelihoods G = null; if (isHapmapSite(tracker)) { - G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff); + G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff, keepQ0Bases); } else if (isDbSNPSite(tracker)) { - G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff); + G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff, keepQ0Bases); } else { - G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff); + G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff, keepQ0Bases); } - - G.filterQ0Bases(! keepQ0Bases); // Set the filtering / keeping flag - - for (int i = 0; i < reads.size(); i++) { - SAMRecord read = reads.get(i); - int offset = offsets.get(i); - - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - int nBasesAdded = G.add(ref, base, qual); - if ( nBasesAdded == 0 ) { - nFilteredQ0Bases++; - //System.out.printf("Filtering Q0 base from %s at %s %d: %c %c %d%n", - // read.getReadName(), GenomeLocParser.createGenomeLoc(read), offset, ref, base, qual); - } - } - - G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior); - - //if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) { - // G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup()); - //} - - G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup()); - return G; } @@ -312,69 +253,38 @@ public class SingleSampleGenotyper extends LocusWalker= 5 variant, output it to disk. * - * @param alleleFreq an AlleleFrequencyEstimage object for the variant. - * @param sum accumulator for the reduce. + * @param call an GenotypeCall object for the variant. + * @param sum accumulator for the reduce. * * @return an empty string */ - public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) { - //System.out.printf("AlleleFreqEstimate %s %f %f %f%n", alleleFreq,alleleFreq.lodVsNextBest, alleleFreq.lodVsRef, LOD_THRESHOLD ); - if (alleleFreq != null && (GENOTYPE ? alleleFreq.lodVsNextBest : alleleFreq.lodVsRef) >= LOD_THRESHOLD) { - if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) { - variantsOut.println(alleleFreq.asGeliString()); - } else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) { - variantsOut.println(alleleFreq.asTabularString()); - } else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) { - SAMSequenceRecord rec = GenomeLocParser.getContigInfo(alleleFreq.location.getContig()); - - double[] likelihoods = new double[10]; - for (int x = 0; x < alleleFreq.posteriors.length; x++) { - likelihoods[x] = GLFRecord.LIKELIHOOD_SCALE_FACTOR * alleleFreq.genotypeLikelihoods.likelihoods[x]; - } - - LikelihoodObject obj = new LikelihoodObject(likelihoods, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - - obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); - this.mGenotypeWriter.addGenotypeCall(rec,(int)alleleFreq.location.getStart(),0.0f,alleleFreq.ref,alleleFreq.depth,obj); - } + public GenotypeWriter reduce(GenotypeCall call, GenotypeWriter sum) { + if (call != null && call.isVariation()) { + if ((GENOTYPE && call.getBestVrsNext().second.getScore() > LOD_THRESHOLD) || + (call.getBestVrsRef().second.getScore() > LOD_THRESHOLD)) + sum.addGenotypeCall(call); } - - return ""; + return sum; } /** Close the variant file. */ - public void onTraversalDone(String sum) { + public void onTraversalDone(GenotypeWriter sum) { logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases)); - if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) { - mGenotypeWriter.close(); - } else { - this.variantsOut.close(); - } + sum.close(); } } + + diff --git a/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java b/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java index dcdb1bcf5..b769bda5c 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java +++ b/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java @@ -1,15 +1,17 @@ package org.broadinstitute.sting.playground.utils; -import org.broadinstitute.sting.gatk.refdata.rodGFF; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker; +import org.broadinstitute.sting.gatk.refdata.rodGFF; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.genotype.ConfidenceScore; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.GenotypeCall; -import java.util.List; -import java.io.PrintStream; import java.io.File; import java.io.FileNotFoundException; +import java.io.PrintStream; public class AlleleMetrics { private double LOD_cutoff = 5; @@ -54,7 +56,7 @@ public class AlleleMetrics { this.LOD_cutoff = lodThresold; } - public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) { + public void nextPosition(GenotypeCall call, RefMetaDataTracker tracker) { num_loci_total += 1; boolean is_dbSNP_SNP = false; @@ -78,10 +80,10 @@ public class AlleleMetrics { } } } + Pair result = call.getBestVrsRef(); + if (Math.abs(call.getBestVrsNext().second.getScore()) >= LOD_cutoff) { num_loci_confident += 1; } - if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; } - - if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff) + if (call.isVariation() && result.second.getScore() >= LOD_cutoff) { // Confident variant. @@ -99,10 +101,12 @@ public class AlleleMetrics { String hapmap_genotype = hapmap_chip_genotype.getFeature(); long refs=0, alts=0; double hapmap_q; - + String str = call.getBestVrsRef().first.getBases(); + char alt = str.charAt(0); + if (str.charAt(0) == call.getReferencebase()) alt = str.charAt(1); for (char c : hapmap_genotype.toCharArray()) { - if (c == alleleFreq.ref) { refs++; } - if (c == alleleFreq.alt) { alts++; } + if (c == call.getReferencebase()) { refs++; } + if (c == alt) { alts++; } } if (refs+alts > 0) { @@ -117,7 +121,7 @@ public class AlleleMetrics { //out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt); //System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString()); - if (alleleFreq.lodVsNextBest >= LOD_cutoff) { + if (call.getBestVrsNext().second.getScore() >= LOD_cutoff) { /* System.out.printf("DBG %f %f %f %f\n", @@ -129,22 +133,22 @@ public class AlleleMetrics { // Calculate genotyping performance - did we get the correct genotype of the N+1 choices? //if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) { - if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) { + /*if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) { hapmap_genotype_correct++; }else{ hapmap_genotype_incorrect++; //System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context)); //out.printf(" INCORRECT GENOTYPE"); //AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context)); - } + }*/ } - if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) { + if (result.second.getScore() >= LOD_cutoff || -1 * result.second.getScore() >= LOD_cutoff) { // Now calculate ref / var performance - did we correctly classify the site as // reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here boolean hapmap_var = hapmap_q != 0.0; - boolean called_var = alleleFreq.qstar != 0.0; + boolean called_var = call.isVariation(); //if (hapmap_q != -1 && hapmap_var != called_var) { if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && hapmap_var != called_var) { hapmap_refvar_incorrect++; diff --git a/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java index a53c837e3..52d158958 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java @@ -1,14 +1,15 @@ package org.broadinstitute.sting.playground.utils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.*; import static java.lang.Math.log10; import static java.lang.Math.pow; import java.util.HashMap; -public class GenotypeLikelihoods { +public class GenotypeLikelihoods implements GenotypeGenerator { // precalculate these for performance (pow/log10 is expensive!) /** @@ -20,7 +21,7 @@ public class GenotypeLikelihoods { private static final double[] oneMinusData = new double[Byte.MAX_VALUE]; private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE]; private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE]; - + private final boolean keepQ0Bases; private static final double log10Of1_3 = log10(1.0 / 3); private static final double log10Of2_3 = log10(2.0 / 3); @@ -62,7 +63,7 @@ public class GenotypeLikelihoods { genotypes[8] = "GT"; genotypes[9] = "TT"; } - public int coverage; + public int coverage; // The genotype priors; private double priorHomRef; @@ -80,18 +81,19 @@ public class GenotypeLikelihoods { public GenotypeLikelihoods() { double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000}; double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505}; - + keepQ0Bases = true; initialize(false, 1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff); } public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar) { double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000}; double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505}; - + keepQ0Bases = true; initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff); } - public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) { + public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff, boolean keepQ0Bases) { + this.keepQ0Bases = keepQ0Bases; initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff); } @@ -276,62 +278,35 @@ public class GenotypeLikelihoods { return s; } - public void ApplyPrior(char ref, double[] allele_likelihoods) - { - int k = 0; - for (int i = 0; i < 4; i++) - { - for (int j = i; j < 4; j++) - { - if (i == j) - { - this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]); - } - else - { - this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2); - } - k++; - } - } - this.sort(); - } + public void ApplyPrior(char ref, double[] allele_likelihoods) { + int k = 0; + for (int i = 0; i < 4; i++) { + for (int j = i; j < 4; j++) { + if (i == j) { + this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]); + } else { + this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2); + } + k++; + } + } + this.sort(); + } - public void ApplyPrior(char ref, char alt, double p_alt) { + public void ApplyPrior(char ref) { for (int i = 0; i < genotypes.length; i++) { - if ((p_alt == -1) || (p_alt <= 1e-6)) { - if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { - // hom-ref - likelihoods[i] += Math.log10(priorHomRef); - } else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) { - // hom-nonref - likelihoods[i] += Math.log10(priorHomVar); - } else { - // het - likelihoods[i] += Math.log10(priorHet); - } - if (Double.isInfinite(likelihoods[i])) { - likelihoods[i] = -1000; - } + if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { + // hom-ref + likelihoods[i] += Math.log10(priorHomRef); + } else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) { + // hom-nonref + likelihoods[i] += Math.log10(priorHomVar); } else { - if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { - // hom-ref - likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt); - } else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) { - // hom-nonref - likelihoods[i] += 2.0 * Math.log10(p_alt); - } else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) || - ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) { - // het - likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0); - } else { - // something else (noise!) - likelihoods[i] += Math.log10(1e-5); - } - - if (Double.isInfinite(likelihoods[i])) { - likelihoods[i] = -1000; - } + // het + likelihoods[i] += Math.log10(priorHet); + } + if (Double.isInfinite(likelihoods[i])) { + likelihoods[i] = -1000; } } this.sort(); @@ -410,51 +385,66 @@ public class GenotypeLikelihoods { return this.sorted_likelihoods[0]; } - public double RefPosterior(char ref) - { - this.LodVsRef(ref); - return this.ref_likelihood; - } - - public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors, String sample_name) { - this.sort(); - double qhat = Double.NaN; - double qstar = Double.NaN; - char alt = 'N'; - - if ((sorted_genotypes[0].charAt(0) == ref) && (sorted_genotypes[0].charAt(1) == ref)) { - // hom-ref - qhat = 0.0; - qstar = 0.0; - alt = 'N'; - } else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref)) { - // hom-nonref - qhat = 1.0; - qstar = 1.0; - alt = sorted_genotypes[0].charAt(0); - } else { - // het - qhat = 0.5; - qstar = 0.5; - - if (sorted_genotypes[0].charAt(0) != ref) { - alt = sorted_genotypes[0].charAt(0); - } - if (sorted_genotypes[0].charAt(1) != ref) { - alt = sorted_genotypes[0].charAt(1); - } - } - - this.LodVsRef(ref); //HACK - //System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood); - - AlleleFrequencyEstimate AFE = new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name); - AFE.genotypeLikelihoods = this; - return AFE; + public double RefPosterior(char ref) { + this.LodVsRef(ref); + return this.ref_likelihood; } - private IndelLikelihood indel_likelihood; - public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; } - public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; } + private IndelLikelihood indel_likelihood; + public void addIndelLikelihood(IndelLikelihood indel_likelihood) { + this.indel_likelihood = indel_likelihood; + } + + public IndelLikelihood getIndelLikelihood() { + return this.indel_likelihood; + } + + /** + * given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs + * + * @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information + * @param ref the reference base + * @param pileup a pileup of the reads, containing the reads and their offsets + * + * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values + */ + @Override + public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) { + //filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag + + + // for calculating the rms of the mapping qualities + double squared = 0.0; + for (int i = 0; i < pileup.getReads().size(); i++) { + SAMRecord read = pileup.getReads().get(i); + squared += read.getMappingQuality() * read.getMappingQuality(); + int offset = pileup.getOffsets().get(i); + char base = read.getReadString().charAt(offset); + byte qual = read.getBaseQualities()[offset]; + add(ref, base, qual); + } + // save off the likelihoods + if (likelihoods == null || likelihoods.length == 0) return null; + + double lklihoods[] = new double[likelihoods.length]; + + System.arraycopy(likelihoods, 0, lklihoods, 0, likelihoods.length); + + + ApplyPrior(ref); + + applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup()); + + // lets setup the locus + GenotypeLocus locus = new GenotypeLocusImpl(pileup.getLocation(), pileup.getReads().size(),Math.sqrt(squared/pileup.getReads().size())); + for (int x = 0; x < this.likelihoods.length; x++) { + try { + locus.addGenotype(new Genotype(this.genotypes[x],lklihoods[x],this.likelihoods[x])); + } catch (InvalidGenotypeException e) { + throw new StingException("Invalid Genotype value",e); + } + } + return locus; + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceScore.java new file mode 100644 index 000000000..33f1ee95b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceScore.java @@ -0,0 +1,65 @@ +package org.broadinstitute.sting.utils.genotype; + + +/** + * @author aaron + *

+ * Class ConfidenceScore + *

+ * this class represents the confidence in a genotype, and the method we used to obtain it + */ +public class ConfidenceScore implements Comparable { + + private static final Double EPSILON = 1.0e-15; + + public enum SCORE_METHOD { + BEST_NEXT, BEST_REF, OTHER; + } + + private Double mScore; + private SCORE_METHOD mMethod; + + public ConfidenceScore(double score, SCORE_METHOD method) { + this.mScore = score; + this.mMethod = method; + } + + /** + * generate a confidence score, given the two likelihoods, and the method used + * + * @param likelihoodOne the first likelihood + * @param likelihoodTwo the second likelihood + * @param method the method used to determine the likelihood + */ + public ConfidenceScore(double likelihoodOne, double likelihoodTwo, SCORE_METHOD method) { + this.mScore = likelihoodOne / likelihoodTwo; + this.mMethod = method; + } + + /** + * compare this ConfidenceScore to another, throwing an exception if they're not the same scoring method + * @param o the other confidence score if + * @return 0 if equal + */ + @Override + public int compareTo(ConfidenceScore o) { + if (o.mMethod != this.mMethod) { + throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods"); + } + double diff = mScore - o.mScore; + if (Math.abs(diff) < (EPSILON * Math.abs(mScore))) + return 0; + else if (diff < 0) + return -1; + else + return 1; + } + + public Double getScore() { + return mScore; + } + + public SCORE_METHOD getMethod() { + return mMethod; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java new file mode 100644 index 000000000..4bc7ebe73 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -0,0 +1,113 @@ +package org.broadinstitute.sting.utils.genotype; + + +/** + * @author aaron + *

+ * Class GenotypeLikelihood + *

+ * This class encompasses all the information that is associated with a genotype + * and it's likelihood, mainly: + *

+ * Likelihood value + */ +public class Genotype { + private double mLikelihood = 0.0; + private double mPosteriorProb = 0.0; + private String mBases = ""; + private int mPloidy = 2; // assume diploid + + /** + * construct a genotypeLikelihood, given the bases, the posterior, and the likelihood + * + * @param bases the bases that make up this genotype + * @param posterior the posterior probability of this genotype + * @param likelihood the likelihood of this genotype + * @param ploidy the ploidy of this genotype + */ + public Genotype(String bases, double posterior, double likelihood, int ploidy) { + this.mPloidy = ploidy; + if (bases.length() != ploidy) { + throw new IllegalArgumentException("The number of bases should match the ploidy"); + } + this.mLikelihood = likelihood; + this.mBases = bases; + this.mPosteriorProb = posterior; + } + + /** + * construct a genotypeLikelihood, given the bases, the posterior, and the likelihood + * + * @param bases the bases that make up this genotype + * @param posterior the posterior probability of this genotype + * @param likelihood the likelihood of this genotype + */ + public Genotype(String bases, double posterior, double likelihood) { + if (bases.length() != mPloidy) { + throw new IllegalArgumentException("The number of bases should match the ploidy"); + } + this.mLikelihood = likelihood; + this.mBases = bases; + this.mPosteriorProb = posterior; + } + + /** + * get the likelihood value + * + * @return a double, representing the likelihood + */ + public double getLikelihood() { + return mLikelihood; + } + + /** + * get the posterior value + * + * @return a double, representing the posterior + */ + public double getPosteriorProb() { + return mPosteriorProb; + } + + /** + * get the bases that represent this + * + * @return + */ + public String getBases() { + return mBases; + } + + public int getPloidy() { + return mPloidy; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return + */ + public boolean isHom() { + if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); + char first = mBases.charAt(0); + for (char c: mBases.toCharArray()) { + if (c != first) return false; + } + return true; + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return + */ + public boolean isHet() { + if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); + char first = mBases.charAt(0); + for (char c: mBases.toCharArray()) { + if (c != first) return true; + } + return false; + } + +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java new file mode 100644 index 000000000..c98af4651 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.Pair; + +/** + * @author aaron + *

+ * Interface GenotypeCall + *

+ * Genotype call interface, for indicating that a genotype is + * also a genotype call. + */ +public interface GenotypeCall extends GenotypeLocus{ + /** + * get the confidence + * + * @return a ConfidenceScore representing the LOD score that this genotype was called with + */ + public ConfidenceScore getConfidence(); + + /** + * gets the reference base + * + * @return the reference base we represent + */ + public char getReferencebase(); + + /** + * get the best vrs the next best genotype LOD score + * @return the genotype, and a LOD for best - next + */ + public Pair getBestVrsNext(); + + /** + * get the best vrs the reference allele. + * @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked + * with is variation + */ + public Pair getBestVrsRef(); + + /** + * check to see if this call is a variant, i.e. not homozygous reference + * @return true if it's not hom ref, false otherwise + */ + public boolean isVariation(); + + /** + * return genotype locus, with our data + */ + public GenotypeLocus toGenotypeLocus(); +} + diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java new file mode 100644 index 000000000..4db9bcc3b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java @@ -0,0 +1,225 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.List; + + +/** + * @author aaron + *

+ * Class GenotypeCallImpl + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class GenotypeCallImpl implements GenotypeCall { + + // our stored genotype locus + private final GenotypeLocus mLocus; + private final char mRefBase; + private final ConfidenceScore mScore; + + /** + * generate a GenotypeCall object with the specified locus info and reference base + * + * @param mLocus the locus + * @param mRefBase the reference base to use + */ + public GenotypeCallImpl(GenotypeLocus mLocus, char mRefBase, ConfidenceScore mScore) { + if (mLocus.getGenotypes().size() < 1) throw new StingException("Genotype Locus is empty"); + this.mLocus = mLocus; + this.mRefBase = String.valueOf(mRefBase).toUpperCase().charAt(0); + this.mScore = mScore; + } + + /** + * get the confidence + * + * @return a ConfidenceScore representing the LOD score that this genotype was called with + */ + @Override + public ConfidenceScore getConfidence() { + return mScore; + } + + /** + * gets the reference base + * + * @return the reference base we represent + */ + @Override + public char getReferencebase() { + return mRefBase; + } + + /** + * get the best vrs the next best genotype LOD score + * + * @return the genotype, and a LOD for best - next + */ + @Override + public Pair getBestVrsNext() { + List genos = this.mLocus.getGenotypes(); + if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes"); + return new Pair(genos.get(0), + new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - genos.get(1).getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT)); + } + + /** + * get the best vrs the reference allele. + * + * @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked + * with is variation + */ + @Override + public Pair getBestVrsRef() { + List genos = this.mLocus.getGenotypes(); + + // find the reference allele + String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase(); + Genotype refGenotype = findRefGenotype(ref, genos); + if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes"); + return new Pair(genos.get(0), + new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - refGenotype.getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT)); + } + + /** + * get the reference genotype object + * + * @param ref the reference as a ploidy count homozygous string + * @param genos the genotype list + * + * @return a genotype for the + */ + private static Genotype findRefGenotype(String ref, List genos) { + Genotype refGenotype = null; + for (Genotype g : genos) { + if (g.getBases().equals(ref)) refGenotype = g; + } + if (refGenotype == null) { + for (Genotype g : genos) { + System.err.println(g.getBases()); + } + throw new StingException("Unable to find the reference genotype + " + ref + " size of genotype list = " + genos.size()); + } + return refGenotype; + } + + /** + * check to see if this call is a variant, i.e. not homozygous reference + * + * @return true if it's not hom ref, false otherwise + */ + @Override + public boolean isVariation() { + List genos = this.mLocus.getGenotypes(); + String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase(); + return !(genos.get(0).getBases().equals(ref)); + } + + /** return genotype locus, with our data */ + @Override + public GenotypeLocus toGenotypeLocus() { + return mLocus; + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + @Override + public GenomeLoc getLocation() { + return mLocus.getLocation(); + } + + /** + * get the ploidy at this locus + * + * @return an integer representing the genotype ploidy at this location + */ + @Override + public int getPloidy() { + return mLocus.getPloidy(); + } + + /** + * get the genotypes, sorted in asscending order by their likelihoods (the best + * to the worst likelihoods) + * + * @return a list of the likelihoods + */ + @Override + public List getGenotypes() { + return mLocus.getGenotypes(); + } + + /** + * get the genotypes and their posteriors + * + * @return a list of the poseriors + */ + @Override + public List getPosteriors() { + return mLocus.getPosteriors(); + } + + /** + * get the genotypes sorted lexigraphically + * + * @return a list of the genotypes sorted lexi + */ + @Override + public List getLexigraphicallySortedGenotypes() { + return mLocus.getLexigraphicallySortedGenotypes(); + } + + /** + * get the read depth at this position + * + * @return the read depth, -1 if it is unknown + */ + @Override + public int getReadDepth() { + return mLocus.getReadDepth(); + } + + /** + * add a genotype to the collection + * + * @param genotype + * + * @throws InvalidGenotypeException + */ + @Override + public void addGenotype(Genotype genotype) throws InvalidGenotypeException { + mLocus.addGenotype(genotype); + } + + /** + * get the root mean square (RMS) of the mapping qualities + * + * @return the RMS, or a value < 0 if it's not available + */ + @Override + public double getRMSMappingQuals() { + return mLocus.getRMSMappingQuals(); + } + + /** + * create a variant, given the reference, and a lod score + * + * @param refBase the reference base + * @param score the threshold to use to determine if it's a variant or not + * + * @return a variant object, or null if no genotypes meet the criteria + */ + @Override + public Variant toGenotypeCall(char refBase, ConfidenceScore score) { + return null; //To change body of implemented methods use File | Settings | File Templates. + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java new file mode 100644 index 000000000..3850f0634 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java @@ -0,0 +1,26 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.ReadBackedPileup; + + +/** + * + * @author aaron + * + * Class GenotypeFactory + * + * Create genotypes, given certain pileup information + */ +public interface GenotypeGenerator { + + /** + * given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs + * + * @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information + * @param ref the reference base + * @param pileup a pileup of the reads, containing the reads and their offsets + * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values + */ + public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java new file mode 100644 index 000000000..082461287 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java @@ -0,0 +1,132 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.List; +import java.util.Comparator; + +/** + * @author aaron + *

+ * Interface Genotype + *

+ * This interface represents the collection of genotypes at a specific locus + */ +public interface GenotypeLocus { + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + public GenomeLoc getLocation(); + + /** + * get the ploidy at this locus + * + * @return an integer representing the genotype ploidy at this location + */ + public int getPloidy(); + + /** + * get the genotypes, sorted in asscending order by their likelihoods (the best + * to the worst likelihoods) + * + * @return a list of the genotypes, sorted by likelihoods + */ + public List getGenotypes(); + + /** + * get the genotypes and their posteriors + * + * @return a list of the genotypes, sorted by poseriors + */ + public List getPosteriors(); + + /** + * get the genotypes sorted lexigraphically + * + * @return a list of the genotypes sorted lexi + */ + public List getLexigraphicallySortedGenotypes(); + + + /** + * get the read depth at this position + * + * @return the read depth, -1 if it is unknown + */ + public int getReadDepth(); + + /** + * add a genotype to the collection + * + * @param genotype + * + * @throws InvalidGenotypeException + */ + public void addGenotype(Genotype genotype) throws InvalidGenotypeException; + + /** + * get the root mean square (RMS) of the mapping qualities + * + * @return the RMS, or a value < 0 if it's not available + */ + public double getRMSMappingQuals(); + + /** + * create a variant, given the reference, and a lod score + * + * @param refBase the reference base + * @param score the threshold to use to determine if it's a variant or not + * + * @return a variant object, or null if no genotypes meet the criteria + */ + public Variant toGenotypeCall(char refBase, ConfidenceScore score); + +} + + +/** + * the following are helper Comparator classes for the above sort orders, that may be useful + * for anyone implementing the GenotypeLocus interface + */ +class PosteriorComparator implements Comparator { + private final Double EPSILON = 1.0e-15; + + @Override + public int compare(Genotype genotype, Genotype genotype1) { + double diff = genotype.getPosteriorProb() - genotype1.getPosteriorProb(); + if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getPosteriorProb()))) + return 0; + else if (diff < 0) + return 1; + else + return -1; // TODO: BACKWARD NOW + } +} + +class LexigraphicalComparator implements Comparator { + private final Double EPSILON = 1.0e-15; + + @Override + public int compare(Genotype genotype, Genotype genotype1) { + return genotype.getBases().compareTo(genotype1.getBases()); + } +} + +class LikelihoodComparator implements Comparator { + private final Double EPSILON = 1.0e-15; + + @Override + public int compare(Genotype genotype, Genotype genotype1) { + double diff = genotype.getLikelihood() - genotype1.getLikelihood(); + if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getLikelihood()))) + return 0; + else if (diff < 0) + return 1; // TODO: BACKWARD NOW + else + return -1; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java new file mode 100644 index 000000000..eab555781 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java @@ -0,0 +1,133 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + + +/** + * @author aaron + *

+ * Class GenotypeBucket + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public class GenotypeLocusImpl implements GenotypeLocus { + + private final List mGenotypes = new ArrayList(); + private GenomeLoc mLocation = null; + private int mReadDepth = -1; + private double mRMSMappingQual = -1; + + public GenotypeLocusImpl(GenomeLoc location, int readDepth, double rmsMappingQual) { + this.mLocation = location; + mReadDepth = readDepth; + mRMSMappingQual = rmsMappingQual; + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + @Override + public GenomeLoc getLocation() { + return mLocation; + } + + /** + * get the ploidy at this locus + * + * @return an integer representing the genotype ploidy at this location + */ + @Override + public int getPloidy() { + return 2; + } + + /** + * get the genotypes, sorted in asscending order by their likelihoods (the best + * to the worst likelihoods) + * + * @return a list of the likelihoods + */ + @Override + public List getGenotypes() { + Collections.sort(this.mGenotypes, new LikelihoodComparator()); + return this.mGenotypes; + } + + + + /** + * get the genotypes and their posteriors + * + * @return a list of the poseriors + */ + @Override + public List getPosteriors() { + Collections.sort(this.mGenotypes, new PosteriorComparator()); + return this.mGenotypes; + } + + /** + * get the genotypes sorted lexigraphically + * + * @return a list of the genotypes sorted lexi + */ + @Override + public List getLexigraphicallySortedGenotypes() { + Collections.sort(this.mGenotypes, new LexigraphicalComparator()); + return this.mGenotypes; + } + + /** + * get the read depth at this position + * + * @return the read depth, -1 if it is unknown + */ + @Override + public int getReadDepth() { + return mReadDepth; + } + + + + /** + * add a genotype to the collection + * + * @param genotype + * + * @throws InvalidGenotypeException + */ + @Override + public void addGenotype(Genotype genotype) throws InvalidGenotypeException { + this.mGenotypes.add(genotype); + } + + /** + * get the root mean square (RMS) of the mapping qualities + * + * @return the RMS, or a value < 0 if it's not available + */ + @Override + public double getRMSMappingQuals() { + return mRMSMappingQual; + } + + /** + * create a variant, given the reference, and a lod score + * + * @param refBase the reference base + * @param score the threshold to use to determine if it's a variant or not + * + * @return a variant object, or null if no genotypes meet the criteria + */ + @Override + public Variant toGenotypeCall(char refBase, ConfidenceScore score) { + return null; //To change body of implemented methods use File | Settings | File Templates. + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 22966f8ae..78a191b6d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.utils.genotype; -import net.sf.samtools.SAMSequenceRecord; - /* * Copyright (c) 2009 The Broad Institute * @@ -37,50 +35,17 @@ import net.sf.samtools.SAMSequenceRecord; public interface GenotypeWriter { /** - * add a single point genotype call to the - * - * @param contig the contig you're calling in - * @param position the position on the contig - * @param referenceBase the reference base - * @param readDepth the read depth at the specified position - * @param likelihoods the likelihoods of each of the possible alleles + * Add a genotype, given a genotype locus + * @param locus the locus to add */ - public void addGenotypeCall(SAMSequenceRecord contig, - int position, - float rmsMapQuals, - char referenceBase, - int readDepth, - LikelihoodObject likelihoods); - - /** - * add a variable length call to the genotyper - * - * @param contig the contig you're calling in - * @param position the position on the genome - * @param rmsMapQuals the root mean square of the mapping qualities - * @param readDepth the read depth - * @param refBase the reference base - * @param firstHomZyg the first homozygous indel - * @param secondHomZyg the second homozygous indel (if present, null if not) - * @param hetLikelihood the heterozygous likelihood - */ - public void addVariableLengthCall(SAMSequenceRecord contig, - int position, - float rmsMapQuals, - int readDepth, - char refBase, - IndelLikelihood firstHomZyg, - IndelLikelihood secondHomZyg, - byte hetLikelihood); + public void addGenotypeCall(GenotypeCall locus); /** * add a no call to the genotype file, if supported. * - * @param position - * @param readDepth + * @param position the position to add the no call at */ - public void addNoCall(int position, - int readDepth); + public void addNoCall(int position); /** finish writing, closing any open files. */ public void close(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index bbca086b7..1dff1dff4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter; +import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; import java.io.File; @@ -17,7 +19,7 @@ import java.io.File; public class GenotypeWriterFactory { /** available genotype writers */ public enum GENOTYPE_FORMAT { - GELI, GLF, GFF, TABULAR; + GELI, GLF, GFF, TABULAR, GELI_BINARY; } /** @@ -32,6 +34,8 @@ public class GenotypeWriterFactory { case GLF: return new GLFWriter(header.toString(), destination); case GELI: + return new GeliTextWriter(destination); + case GELI_BINARY: return new GeliAdapter(destination, header); default: throw new StingException("Genotype writer " + format.toString() + " is not implemented"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/InvalidGenotypeException.java b/java/src/org/broadinstitute/sting/utils/genotype/InvalidGenotypeException.java new file mode 100644 index 000000000..45c013d8e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/InvalidGenotypeException.java @@ -0,0 +1,20 @@ +package org.broadinstitute.sting.utils.genotype; + + +/** + * + * @author aaron + * + * Class GenotypeException + * + * This exception is thrown when a genotype call is passed in that cannot be processed, i.e. invalid. + */ +public class InvalidGenotypeException extends Exception { + public InvalidGenotypeException(String msg) { + super(msg); + } + + public InvalidGenotypeException(String message, Throwable throwable) { + super(message, throwable); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java index 83b7c9d4c..0fe66affd 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype; -import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.StingException; import java.io.File; @@ -37,55 +36,33 @@ public class TabularLFWriter implements GenotypeWriter { } /** - * add a single point genotype call to the + * Add a genotype, given a genotype locus * - * @param contig the contig you're calling in - * @param position the position on the contig - * @param referenceBase the reference base - * @param readDepth the read depth at the specified position - * @param likelihoods the likelihoods of each of the possible alleles + * @param locus the locus to add */ @Override - public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) { - /**return String.format("%s %s %c %c %s %f %f %f %f %d %s", - location, - contig.getSpecies(), - ref, - alt, + public void addGenotypeCall(GenotypeCall locus) { + /*outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s", + locus.getLocation(), + "NOT OUTPUTED", + locus.getReferencebase(), + locus.getGenotypes().get(1).getBases()., genotype(), qhat, qstar, lodVsRef, lodVsNextBest, depth, - bases);*/ - } - - /** - * add a variable length call to the genotyper - * - * @param contig the contig you're calling in - * @param position the position on the genome - * @param rmsMapQuals the root mean square of the mapping qualities - * @param readDepth the read depth - * @param refBase the reference base - * @param firstHomZyg the first homozygous indel - * @param secondHomZyg the second homozygous indel (if present, null if not) - * @param hetLikelihood the heterozygous likelihood - */ - @Override - public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) { - throw new StingException("TabularLFWriter doesn't support variable length calls"); + bases); */ } /** * add a no call to the genotype file, if supported. * * @param position - * @param readDepth */ @Override - public void addNoCall(int position, int readDepth) { + public void addNoCall(int position) { throw new StingException("TabularLFWriter doesn't support no-calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java new file mode 100644 index 000000000..0a06ac1f2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java @@ -0,0 +1,11 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author aaron + *

+ * Interface Variant + *

+ * This class represents a variant + */ +public interface Variant { +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java similarity index 87% rename from java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java rename to java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 1903c0d02..e273cbe96 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -1,8 +1,12 @@ -package org.broadinstitute.sting.utils.genotype; +package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.IndelLikelihood; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import java.io.File; @@ -65,7 +69,6 @@ public class GeliAdapter implements GenotypeWriter { * @param readDepth the read depth at the specified position * @param likelihoods the likelihoods of each of the possible alleles */ - @Override public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, @@ -87,19 +90,27 @@ public class GeliAdapter implements GenotypeWriter { * @param secondHomZyg the second homozygous indel (if present, null if not) * @param hetLikelihood the heterozygous likelihood */ - @Override public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) { throw new UnsupportedOperationException("Geli format does not support variable length allele calls"); } + /** + * Add a genotype, given a genotype locus + * + * @param locus the locus to add + */ + @Override + public void addGenotypeCall(GenotypeCall locus) { + //To change body of implemented methods use File | Settings | File Templates. + } + /** * add a no call to the genotype file, if supported. * * @param position - * @param readDepth */ @Override - public void addNoCall(int position, int readDepth) { + public void addNoCall(int position) { throw new UnsupportedOperationException("Geli format does not support no-calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java new file mode 100644 index 000000000..643607594 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -0,0 +1,93 @@ +package org.broadinstitute.sting.utils.genotype.geli; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintWriter; +import java.util.List; + + +/** + * + * @author aaron + * + * Class GeliTextWriter + * + * A descriptions should go here. Blame aaron if it's missing. + */ +public class GeliTextWriter implements GenotypeWriter { + // where we write to + PrintWriter mWriter; + + /** + * create a geli text writer + * @param file + */ + public GeliTextWriter(File file) { + try { + mWriter = new PrintWriter(file); + } catch (FileNotFoundException e) { + throw new StingException("Unable to open file " + file.toURI()); + } + mWriter.println("#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT"); + } + + /** + * Add a genotype, given a genotype locus + * + * @param locus the locus to add + */ + public void addGenotypeCall(GenotypeCall locus) { + if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)"); + + + // this is to perserve the format string that we used to use + double[] likelihoods = new double[10]; + int index = 0; + List lt = locus.getLexigraphicallySortedGenotypes(); + for (Genotype G: lt) { + likelihoods[index] = G.getLikelihood(); + index++; + } + + mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + locus.getLocation().getContig(), + locus.getLocation().getStart(), + locus.getReferencebase(), + locus.getReadDepth(), + -1, + locus.getGenotypes().get(0).getBases(), + locus.getBestVrsRef().second.getScore(), + locus.getBestVrsNext().second.getScore(), + likelihoods[0], + likelihoods[1], + likelihoods[2], + likelihoods[3], + likelihoods[4], + likelihoods[5], + likelihoods[6], + likelihoods[7], + likelihoods[8], + likelihoods[9])); + } + + /** + * add a no call to the genotype file, if supported. + * + * @param position the position to add the no call at + */ + @Override + public void addNoCall(int position) { + throw new UnsupportedOperationException("Geli text format doesn't support a no-call call."); + } + + /** finish writing, closing any open files. */ + @Override + public void close() { + mWriter.close(); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/gff/GFFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/gff/GFFWriter.java deleted file mode 100644 index d3756a530..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/gff/GFFWriter.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.gff; - -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.IndelLikelihood; -import net.sf.samtools.SAMSequenceRecord; - - -/** - * @author aaron - *

- * Class GFFWriter - *

- * A descriptions should go here. Blame aaron if it's missing. - */ -public class GFFWriter implements GenotypeWriter { - - /** - * add a single point genotype call to the file - * - * @param contig the contig you're calling in - * @param position the position on the contig - * @param referenceBase the reference base - * @param readDepth the read depth at the specified position - * @param likelihoods the likelihoods of each of the possible alleles - */ - @Override - public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) { - //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * add a variable length call to the genotype file - * - * @param contig the contig you're calling in - * @param position the position on the genome - * @param rmsMapQuals the root mean square of the mapping qualities - * @param readDepth the read depth - * @param refBase the reference base - * @param firstHomZyg the first homozygous indel - * @param secondHomZyg the second homozygous indel (if present, null if not) - * @param hetLikelihood the heterozygous likelihood - */ - @Override - public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) { - //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * add a no call to the genotype file, if supported. - * - * @param position the position - * @param readDepth the read depth - */ - @Override - public void addNoCall(int position, int readDepth) { - //To change body of implemented methods use File | Settings | File Templates. - } - - - /** finish writing, closing any open files. */ - @Override - public void close() { - //To change body of implemented methods use File | Settings | File Templates. - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 5da2ed0e8..e58a75f79 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -1,14 +1,14 @@ package org.broadinstitute.sting.utils.genotype.glf; +import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; -import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.IndelLikelihood; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.genotype.*; import java.io.DataOutputStream; import java.io.File; +import java.util.List; /* * Copyright (c) 2009 The Broad Institute * @@ -80,7 +80,6 @@ public class GLFWriter implements GenotypeWriter { * @param rmsMapQ the root mean square of the mapping quality * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods */ - @Override public void addGenotypeCall(SAMSequenceRecord contig, int genomicLoc, float rmsMapQ, @@ -100,6 +99,27 @@ public class GLFWriter implements GenotypeWriter { call.write(this.outputBinaryCodec); } + /** + * Add a genotype, given a genotype locus + * + * @param locus + */ + @Override + public void addGenotypeCall(GenotypeCall locus) { + //TODO: CODEWORD + // this is to perserve the format string that we used to use + double[] posteriors = new double[10]; + int index = 0; + List lt = locus.getLexigraphicallySortedGenotypes(); + for (Genotype G: lt) { + posteriors[index] = G.getLikelihood(); + index++; + } + + LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRMSMappingQuals(),locus.getReferencebase(),locus.getReadDepth(),obj); + } + /** * add a variable length (indel, deletion, etc) to the genotype writer * @@ -112,7 +132,6 @@ public class GLFWriter implements GenotypeWriter { * @param secondHomZyg the second homozygous call * @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255 */ - @Override public void addVariableLengthCall(SAMSequenceRecord contig, int genomicLoc, float rmsMapQ, @@ -145,10 +164,10 @@ public class GLFWriter implements GenotypeWriter { * add a no call to the genotype file, if supported. * * @param position the position - * @param readDepth the read depth + * */ @Override - public void addNoCall(int position, int readDepth) { + public void addNoCall(int position) { // glf doesn't support this operation throw new UnsupportedOperationException("GLF doesn't support a 'no call' call."); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java index 1e4bf2601..e66000fd0 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java @@ -5,9 +5,7 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; import org.junit.Assert; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -190,7 +188,7 @@ public class RodGLFTest extends BaseTest { /** * create the example glf file for the test, you can uncomment the above test line to have this * test run, regenerating the file. - */ + * public void createRodFile() { GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf")); int location = 1; @@ -199,7 +197,7 @@ public class RodGLFTest extends BaseTest { writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T')); writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C')); writer.close(); - } + }*/ /** * create a likelihood object, given the appropriate reference base diff --git a/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java deleted file mode 100644 index ccd4953d7..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java +++ /dev/null @@ -1,73 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.junit.Test; - -import java.io.File; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * - * @author aaron - * - * Class GeliAdapterTest - * - * Tests the GeliAdapter class - */ -public class GeliAdapterTest extends BaseTest { - - - // private our Geli adapter - private GenotypeWriter adapter = null; - - /** - * test out the likelihood object - */ - @Test - public void test1() { - File fl = new File("testFile.txt"); - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10); - adapter = new GeliAdapter(fl,header); - LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); - SAMSequenceRecord rec = new SAMSequenceRecord("chr1",10); - adapter.addGenotypeCall(rec,100,100,'A',100,obj); - adapter.close(); - } - - - public double[] createFakeLikelihoods() { - double ret[] = new double[10]; - for (int x = 0; x < 10; x++) { - ret[x] = (double)(10.0-x) * 10.0; - } - return ret; - } -}