GELI_BINARY is now functional, and can be used as a variant type in SSG (-vf=GELI_BINARY). Also fixed the max mapping quality column in both GELI output formats, we haven't been correctly outputing up until now.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1774 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-07 18:20:34 +00:00
parent 225b9bccc1
commit f9a0eefe4b
5 changed files with 65 additions and 26 deletions

View File

@ -36,9 +36,9 @@ public interface GenotypeWriter {
/** /**
* Add a genotype, given a genotype locus * Add a genotype, given a genotype locus
* @param locus the locus to add * @param call the locus to add
*/ */
public void addGenotypeCall(Genotype locus); public void addGenotypeCall(Genotype call);
/** /**
* add a no call to the genotype file, if supported. * add a no call to the genotype file, if supported.

View File

@ -195,7 +195,7 @@ public class LikelihoodObject {
* *
* @return a GenotypeLikelihoods object representing our data * @return a GenotypeLikelihoods object representing our data
*/ */
public GenotypeLikelihoods convert(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) { public GenotypeLikelihoods convertToGenotypeLikelihoods(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
double[] ft = toDoubleArray(); double[] ft = toDoubleArray();
float[] db = new float[ft.length]; float[] db = new float[ft.length];
int index = 0; int index = 0;
@ -207,6 +207,9 @@ public class LikelihoodObject {
for (; index < ft.length; index++) { for (; index < ft.length; index++) {
db[index] = (float) Math.log(ft[index]); db[index] = (float) Math.log(ft[index]);
} }
} else {
for (int x = 0; x < ft.length; x++)
db[x] = (float)ft[x];
} }
return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db); return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db);
} }

View File

@ -1,7 +1,9 @@
package org.broadinstitute.sting.utils.genotype.geli; package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -9,6 +11,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import java.io.File; import java.io.File;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
/* /*
@ -68,11 +71,17 @@ public class GeliAdapter implements GenotypeWriter {
* @param referenceBase the reference base * @param referenceBase the reference base
* @param likelihoods the likelihoods of each of the possible alleles * @param likelihoods the likelihoods of each of the possible alleles
*/ */
public void addGenotypeCall(SAMSequenceRecord contig, private void addGenotypeCall(SAMSequenceRecord contig,
int position, int position,
char referenceBase, char referenceBase,
double maxMappingQuality,
int readCount,
LikelihoodObject likelihoods) { LikelihoodObject likelihoods) {
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase)); GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase);
lk.setNumReads(readCount);
lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? (short)Short.MAX_VALUE : (short)Math.round(maxMappingQuality));
writer.addGenotypeLikelihoods(lk);
} }
/** /**
@ -110,13 +119,22 @@ public class GeliAdapter implements GenotypeWriter {
} }
char ref = locus.getReference(); char ref = locus.getReference();
int readCount = 0;
double maxMappingQual = 0;
if (locus instanceof ReadBacked) {
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
readCount = recs.size();
for (SAMRecord rec : recs) {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
}
}
SSGenotypeCall call = (SSGenotypeCall)locus; SSGenotypeCall call = (SSGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()), this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
(int)locus.getLocation().getStart(), (int)locus.getLocation().getStart(),
ref, ref,
maxMappingQual,
readCount,
obj); obj);
} }

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.geli; package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
@ -10,6 +11,7 @@ import java.io.FileNotFoundException;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
/** /**
@ -57,9 +59,7 @@ public class GeliTextWriter implements GenotypeWriter {
char ref = locus.getReference(); char ref = locus.getReference();
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();
}
if (!(locus instanceof GenotypesBacked)) { if (!(locus instanceof GenotypesBacked)) {
posteriors = new double[10]; posteriors = new double[10];
Arrays.fill(posteriors, 0.0); Arrays.fill(posteriors, 0.0);
@ -74,14 +74,20 @@ public class GeliTextWriter implements GenotypeWriter {
nextVrsRef = lks[9] - posteriors[index]; nextVrsRef = lks[9] - posteriors[index];
} }
} }
// we have to calcuate our own double maxMappingQual = 0;
if (locus instanceof ReadBacked) {
mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", List<SAMRecord> recs = ((ReadBacked)locus).getReads();
readDepth = recs.size();
for (SAMRecord rec : recs) {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
}
}
mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(), locus.getLocation().getContig(),
locus.getLocation().getStart(), locus.getLocation().getStart(),
ref, ref,
readDepth, readDepth,
-1, maxMappingQual,
locus.getBases(), locus.getBases(),
nextVrsRef, nextVrsRef,
nextVrsBest, nextVrsBest,

View File

@ -17,9 +17,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
return baseTestString() + " --variant_output_format GELI -lod 5"; return baseTestString() + " --variant_output_format GELI -lod 5";
} }
private static String OneMb1StateMD5 = "d5404668e76f206055f03d97162ea6d9"; private static String OneMb1StateMD5 = "ca300adf2442d8874b4f13819547a7fb";
private static String OneMb3StateMD5 = "46fb7b66da3dac341e9c342f751d74cd"; private static String OneMb3StateMD5 = "45caee5f71a30c7175c7389b594369b1";
private static String OneMbEmpiricalMD5 = "ea0be2fd074a6c824a0670ad5b3e0aca"; private static String OneMbEmpiricalMD5 = "c8602b745b5c024f09938a9f87f923cf";
// private static String oneMbMD5(BaseMismatchModel m) { // private static String oneMbMD5(BaseMismatchModel m) {
// switch (m) { // switch (m) {
@ -50,7 +50,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" + " -L 1:10,000,000-10,100,000" +
" -m empirical", " -m empirical",
1, 1,
Arrays.asList("b8975b303952edff3b0273165ba91001")); Arrays.asList("09f7ea954a44e68b80726147aee10944"));
executeTest(String.format("testMultiTechnologies"), spec); executeTest(String.format("testMultiTechnologies"), spec);
} }
@ -85,7 +85,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
public void genotypeTest() { public void genotypeTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1, testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1,
Arrays.asList("7e5dec6481bbbc890493925da9a8f691")); Arrays.asList("b4c6d84ea14f9c34b04b799d3edd199a"));
executeTest("genotypeTest", spec); executeTest("genotypeTest", spec);
} }
@ -144,8 +144,8 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testLOD() { public void testLOD() {
HashMap<Double, String> e = new HashMap<Double, String>(); HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 10.0, "e4c51dca6f1fa999f4399b7412829534" ); e.put( 10.0, "3c6c40551c42f693736dab5e29f7a76c" );
e.put( 3.0, "d804c24d49669235e3660e92e664ba1a" ); e.put( 3.0, "5c82a81f5919b29058f9ca031c00b7ef" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) { for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -163,9 +163,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testHeterozyosity() { public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>(); HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "ca9986b32aac0d6ad6058f4bf10e7df2" ); e.put( 0.01, "c6fc697d31cfda563145138126561c77" );
e.put( 0.0001, "55d4e3e73215b70b22a8e689a4e16d37" ); e.put( 0.0001, "1a1466b7599c60fad75d0513a0c8496e" );
e.put( 1.0 / 1850, "1ae2126f1a6490d6edd15d95bce726c4" ); e.put( 1.0 / 1850, "5758dbcdea158fb053fcea22b06befe8" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) { for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -174,4 +174,16 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
} }
} }
/**
* test the output of a binary geli file
*/
@Test
public void empirical1MbTestBinaryGeli() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseTestString() + " -L 1:10,000,000-11,000,000 -m empirical --variant_output_format GELI_BINARY -lod 5", 1,
Arrays.asList("17e9fc04b2a05cafc53562997c28e127"));
executeTest("empirical1MbTest", spec);
}
} }