GELI_BINARY is now functional, and can be used as a variant type in SSG (-vf=GELI_BINARY). Also fixed the max mapping quality column in both GELI output formats, we haven't been correctly outputing up until now.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1774 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-07 18:20:34 +00:00
parent 225b9bccc1
commit f9a0eefe4b
5 changed files with 65 additions and 26 deletions

View File

@ -36,9 +36,9 @@ public interface GenotypeWriter {
/**
* Add a genotype, given a genotype locus
* @param locus the locus to add
* @param call the locus to add
*/
public void addGenotypeCall(Genotype locus);
public void addGenotypeCall(Genotype call);
/**
* add a no call to the genotype file, if supported.

View File

@ -195,7 +195,7 @@ public class LikelihoodObject {
*
* @return a GenotypeLikelihoods object representing our data
*/
public GenotypeLikelihoods convert(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
public GenotypeLikelihoods convertToGenotypeLikelihoods(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
double[] ft = toDoubleArray();
float[] db = new float[ft.length];
int index = 0;
@ -207,6 +207,9 @@ public class LikelihoodObject {
for (; index < ft.length; index++) {
db[index] = (float) Math.log(ft[index]);
}
} else {
for (int x = 0; x < ft.length; x++)
db[x] = (float)ft[x];
}
return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db);
}

View File

@ -1,7 +1,9 @@
package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -9,6 +11,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import java.io.File;
import java.util.Arrays;
import java.util.List;
/*
@ -68,11 +71,17 @@ public class GeliAdapter implements GenotypeWriter {
* @param referenceBase the reference base
* @param likelihoods the likelihoods of each of the possible alleles
*/
public void addGenotypeCall(SAMSequenceRecord contig,
private void addGenotypeCall(SAMSequenceRecord contig,
int position,
char referenceBase,
double maxMappingQuality,
int readCount,
LikelihoodObject likelihoods) {
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase));
GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase);
lk.setNumReads(readCount);
lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? (short)Short.MAX_VALUE : (short)Math.round(maxMappingQuality));
writer.addGenotypeLikelihoods(lk);
}
/**
@ -110,13 +119,22 @@ public class GeliAdapter implements GenotypeWriter {
}
char ref = locus.getReference();
int readCount = 0;
double maxMappingQual = 0;
if (locus instanceof ReadBacked) {
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
readCount = recs.size();
for (SAMRecord rec : recs) {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
}
}
SSGenotypeCall call = (SSGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
(int)locus.getLocation().getStart(),
ref,
maxMappingQual,
readCount,
obj);
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
@ -10,6 +11,7 @@ import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;
/**
@ -57,9 +59,7 @@ public class GeliTextWriter implements GenotypeWriter {
char ref = locus.getReference();
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();
}
if (!(locus instanceof GenotypesBacked)) {
posteriors = new double[10];
Arrays.fill(posteriors, 0.0);
@ -74,14 +74,20 @@ public class GeliTextWriter implements GenotypeWriter {
nextVrsRef = lks[9] - posteriors[index];
}
}
// we have to calcuate our own
mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
double maxMappingQual = 0;
if (locus instanceof ReadBacked) {
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
readDepth = recs.size();
for (SAMRecord rec : recs) {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
}
}
mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(),
locus.getLocation().getStart(),
ref,
readDepth,
-1,
maxMappingQual,
locus.getBases(),
nextVrsRef,
nextVrsBest,

View File

@ -17,9 +17,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
return baseTestString() + " --variant_output_format GELI -lod 5";
}
private static String OneMb1StateMD5 = "d5404668e76f206055f03d97162ea6d9";
private static String OneMb3StateMD5 = "46fb7b66da3dac341e9c342f751d74cd";
private static String OneMbEmpiricalMD5 = "ea0be2fd074a6c824a0670ad5b3e0aca";
private static String OneMb1StateMD5 = "ca300adf2442d8874b4f13819547a7fb";
private static String OneMb3StateMD5 = "45caee5f71a30c7175c7389b594369b1";
private static String OneMbEmpiricalMD5 = "c8602b745b5c024f09938a9f87f923cf";
// private static String oneMbMD5(BaseMismatchModel m) {
// switch (m) {
@ -50,7 +50,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -m empirical",
1,
Arrays.asList("b8975b303952edff3b0273165ba91001"));
Arrays.asList("09f7ea954a44e68b80726147aee10944"));
executeTest(String.format("testMultiTechnologies"), spec);
}
@ -85,7 +85,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
public void genotypeTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1,
Arrays.asList("7e5dec6481bbbc890493925da9a8f691"));
Arrays.asList("b4c6d84ea14f9c34b04b799d3edd199a"));
executeTest("genotypeTest", spec);
}
@ -144,8 +144,8 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test
public void testLOD() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 10.0, "e4c51dca6f1fa999f4399b7412829534" );
e.put( 3.0, "d804c24d49669235e3660e92e664ba1a" );
e.put( 10.0, "3c6c40551c42f693736dab5e29f7a76c" );
e.put( 3.0, "5c82a81f5919b29058f9ca031c00b7ef" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -163,9 +163,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "ca9986b32aac0d6ad6058f4bf10e7df2" );
e.put( 0.0001, "55d4e3e73215b70b22a8e689a4e16d37" );
e.put( 1.0 / 1850, "1ae2126f1a6490d6edd15d95bce726c4" );
e.put( 0.01, "c6fc697d31cfda563145138126561c77" );
e.put( 0.0001, "1a1466b7599c60fad75d0513a0c8496e" );
e.put( 1.0 / 1850, "5758dbcdea158fb053fcea22b06befe8" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -174,4 +174,16 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
}
}
}
/**
* test the output of a binary geli file
*/
@Test
public void empirical1MbTestBinaryGeli() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseTestString() + " -L 1:10,000,000-11,000,000 -m empirical --variant_output_format GELI_BINARY -lod 5", 1,
Arrays.asList("17e9fc04b2a05cafc53562997c28e127"));
executeTest("empirical1MbTest", spec);
}
}