From f9a0eefe4bd06474dc010e1a6793d611b2b1e1fd Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 7 Oct 2009 18:20:34 +0000 Subject: [PATCH] GELI_BINARY is now functional, and can be used as a variant type in SSG (-vf=GELI_BINARY). Also fixed the max mapping quality column in both GELI output formats, we haven't been correctly outputing up until now. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1774 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/genotype/GenotypeWriter.java | 4 +-- .../utils/genotype/LikelihoodObject.java | 5 ++- .../utils/genotype/geli/GeliAdapter.java | 26 +++++++++++--- .../utils/genotype/geli/GeliTextWriter.java | 22 +++++++----- .../SingleSampleGenotyperIntegrationTest.java | 34 +++++++++++++------ 5 files changed, 65 insertions(+), 26 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 20c8f12a8..90d3210b8 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -36,9 +36,9 @@ public interface GenotypeWriter { /** * Add a genotype, given a genotype locus - * @param locus the locus to add + * @param call the locus to add */ - public void addGenotypeCall(Genotype locus); + public void addGenotypeCall(Genotype call); /** * add a no call to the genotype file, if supported. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java index d9d7c3b3b..abe910b41 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java @@ -195,7 +195,7 @@ public class LikelihoodObject { * * @return a GenotypeLikelihoods object representing our data */ - public GenotypeLikelihoods convert(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) { + public GenotypeLikelihoods convertToGenotypeLikelihoods(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) { double[] ft = toDoubleArray(); float[] db = new float[ft.length]; int index = 0; @@ -207,6 +207,9 @@ public class LikelihoodObject { for (; index < ft.length; index++) { db[index] = (float) Math.log(ft[index]); } + } else { + for (int x = 0; x < ft.length; x++) + db[x] = (float)ft[x]; } return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 2a6a26750..c56ecfa87 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -1,7 +1,9 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; +import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -9,6 +11,7 @@ import org.broadinstitute.sting.utils.genotype.*; import java.io.File; import java.util.Arrays; +import java.util.List; /* @@ -68,11 +71,17 @@ public class GeliAdapter implements GenotypeWriter { * @param referenceBase the reference base * @param likelihoods the likelihoods of each of the possible alleles */ - public void addGenotypeCall(SAMSequenceRecord contig, + private void addGenotypeCall(SAMSequenceRecord contig, int position, char referenceBase, + double maxMappingQuality, + int readCount, LikelihoodObject likelihoods) { - writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase)); + GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase); + lk.setNumReads(readCount); + + lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? (short)Short.MAX_VALUE : (short)Math.round(maxMappingQuality)); + writer.addGenotypeLikelihoods(lk); } /** @@ -110,13 +119,22 @@ public class GeliAdapter implements GenotypeWriter { } char ref = locus.getReference(); - - + int readCount = 0; + double maxMappingQual = 0; + if (locus instanceof ReadBacked) { + List recs = ((ReadBacked)locus).getReads(); + readCount = recs.size(); + for (SAMRecord rec : recs) { + if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); + } + } SSGenotypeCall call = (SSGenotypeCall)locus; LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()), (int)locus.getLocation().getStart(), ref, + maxMappingQual, + readCount, obj); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index cdb9b766c..58727e134 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -1,6 +1,7 @@ + package org.broadinstitute.sting.utils.genotype.geli; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.*; @@ -10,6 +11,7 @@ import java.io.FileNotFoundException; import java.io.PrintStream; import java.io.PrintWriter; import java.util.Arrays; +import java.util.List; /** @@ -57,9 +59,7 @@ public class GeliTextWriter implements GenotypeWriter { char ref = locus.getReference(); - if (locus instanceof ReadBacked) { - readDepth = ((ReadBacked)locus).getReadCount(); - } + if (!(locus instanceof GenotypesBacked)) { posteriors = new double[10]; Arrays.fill(posteriors, 0.0); @@ -74,14 +74,20 @@ public class GeliTextWriter implements GenotypeWriter { nextVrsRef = lks[9] - posteriors[index]; } } - // we have to calcuate our own - - mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + double maxMappingQual = 0; + if (locus instanceof ReadBacked) { + List recs = ((ReadBacked)locus).getReads(); + readDepth = recs.size(); + for (SAMRecord rec : recs) { + if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); + } + } + mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", locus.getLocation().getContig(), locus.getLocation().getStart(), ref, readDepth, - -1, + maxMappingQual, locus.getBases(), nextVrsRef, nextVrsBest, diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperIntegrationTest.java index 43c65354e..526cb50c7 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyperIntegrationTest.java @@ -17,9 +17,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest { return baseTestString() + " --variant_output_format GELI -lod 5"; } - private static String OneMb1StateMD5 = "d5404668e76f206055f03d97162ea6d9"; - private static String OneMb3StateMD5 = "46fb7b66da3dac341e9c342f751d74cd"; - private static String OneMbEmpiricalMD5 = "ea0be2fd074a6c824a0670ad5b3e0aca"; + private static String OneMb1StateMD5 = "ca300adf2442d8874b4f13819547a7fb"; + private static String OneMb3StateMD5 = "45caee5f71a30c7175c7389b594369b1"; + private static String OneMbEmpiricalMD5 = "c8602b745b5c024f09938a9f87f923cf"; // private static String oneMbMD5(BaseMismatchModel m) { // switch (m) { @@ -50,7 +50,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -m empirical", 1, - Arrays.asList("b8975b303952edff3b0273165ba91001")); + Arrays.asList("09f7ea954a44e68b80726147aee10944")); executeTest(String.format("testMultiTechnologies"), spec); } @@ -85,7 +85,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest { public void genotypeTest() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1, - Arrays.asList("7e5dec6481bbbc890493925da9a8f691")); + Arrays.asList("b4c6d84ea14f9c34b04b799d3edd199a")); executeTest("genotypeTest", spec); } @@ -144,8 +144,8 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest { @Test public void testLOD() { HashMap e = new HashMap(); - e.put( 10.0, "e4c51dca6f1fa999f4399b7412829534" ); - e.put( 3.0, "d804c24d49669235e3660e92e664ba1a" ); + e.put( 10.0, "3c6c40551c42f693736dab5e29f7a76c" ); + e.put( 3.0, "5c82a81f5919b29058f9ca031c00b7ef" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -163,9 +163,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "ca9986b32aac0d6ad6058f4bf10e7df2" ); - e.put( 0.0001, "55d4e3e73215b70b22a8e689a4e16d37" ); - e.put( 1.0 / 1850, "1ae2126f1a6490d6edd15d95bce726c4" ); + e.put( 0.01, "c6fc697d31cfda563145138126561c77" ); + e.put( 0.0001, "1a1466b7599c60fad75d0513a0c8496e" ); + e.put( 1.0 / 1850, "5758dbcdea158fb053fcea22b06befe8" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -174,4 +174,16 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest { executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); } } -} \ No newline at end of file + + /** + * test the output of a binary geli file + */ + @Test + public void empirical1MbTestBinaryGeli() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseTestString() + " -L 1:10,000,000-11,000,000 -m empirical --variant_output_format GELI_BINARY -lod 5", 1, + Arrays.asList("17e9fc04b2a05cafc53562997c28e127")); + executeTest("empirical1MbTest", spec); + } + +}