GELI_BINARY is now functional, and can be used as a variant type in SSG (-vf=GELI_BINARY). Also fixed the max mapping quality column in both GELI output formats, we haven't been correctly outputing up until now.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1774 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
225b9bccc1
commit
f9a0eefe4b
|
|
@ -36,9 +36,9 @@ public interface GenotypeWriter {
|
|||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
* @param locus the locus to add
|
||||
* @param call the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(Genotype locus);
|
||||
public void addGenotypeCall(Genotype call);
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ public class LikelihoodObject {
|
|||
*
|
||||
* @return a GenotypeLikelihoods object representing our data
|
||||
*/
|
||||
public GenotypeLikelihoods convert(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
|
||||
public GenotypeLikelihoods convertToGenotypeLikelihoods(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
|
||||
double[] ft = toDoubleArray();
|
||||
float[] db = new float[ft.length];
|
||||
int index = 0;
|
||||
|
|
@ -207,6 +207,9 @@ public class LikelihoodObject {
|
|||
for (; index < ft.length; index++) {
|
||||
db[index] = (float) Math.log(ft[index]);
|
||||
}
|
||||
} else {
|
||||
for (int x = 0; x < ft.length; x++)
|
||||
db[x] = (float)ft[x];
|
||||
}
|
||||
return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
|
||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -9,6 +11,7 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -68,11 +71,17 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
* @param referenceBase the reference base
|
||||
* @param likelihoods the likelihoods of each of the possible alleles
|
||||
*/
|
||||
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||
private void addGenotypeCall(SAMSequenceRecord contig,
|
||||
int position,
|
||||
char referenceBase,
|
||||
double maxMappingQuality,
|
||||
int readCount,
|
||||
LikelihoodObject likelihoods) {
|
||||
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase));
|
||||
GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase);
|
||||
lk.setNumReads(readCount);
|
||||
|
||||
lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? (short)Short.MAX_VALUE : (short)Math.round(maxMappingQuality));
|
||||
writer.addGenotypeLikelihoods(lk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -110,13 +119,22 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
|
||||
}
|
||||
char ref = locus.getReference();
|
||||
|
||||
|
||||
int readCount = 0;
|
||||
double maxMappingQual = 0;
|
||||
if (locus instanceof ReadBacked) {
|
||||
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
|
||||
readCount = recs.size();
|
||||
for (SAMRecord rec : recs) {
|
||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||
}
|
||||
}
|
||||
SSGenotypeCall call = (SSGenotypeCall)locus;
|
||||
LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
|
||||
(int)locus.getLocation().getStart(),
|
||||
ref,
|
||||
maxMappingQual,
|
||||
readCount,
|
||||
obj);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
|
@ -10,6 +11,7 @@ import java.io.FileNotFoundException;
|
|||
import java.io.PrintStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -57,9 +59,7 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
|
||||
char ref = locus.getReference();
|
||||
|
||||
if (locus instanceof ReadBacked) {
|
||||
readDepth = ((ReadBacked)locus).getReadCount();
|
||||
}
|
||||
|
||||
if (!(locus instanceof GenotypesBacked)) {
|
||||
posteriors = new double[10];
|
||||
Arrays.fill(posteriors, 0.0);
|
||||
|
|
@ -74,14 +74,20 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
nextVrsRef = lks[9] - posteriors[index];
|
||||
}
|
||||
}
|
||||
// we have to calcuate our own
|
||||
|
||||
mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
double maxMappingQual = 0;
|
||||
if (locus instanceof ReadBacked) {
|
||||
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
|
||||
readDepth = recs.size();
|
||||
for (SAMRecord rec : recs) {
|
||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||
}
|
||||
}
|
||||
mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
locus.getLocation().getContig(),
|
||||
locus.getLocation().getStart(),
|
||||
ref,
|
||||
readDepth,
|
||||
-1,
|
||||
maxMappingQual,
|
||||
locus.getBases(),
|
||||
nextVrsRef,
|
||||
nextVrsBest,
|
||||
|
|
|
|||
|
|
@ -17,9 +17,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
return baseTestString() + " --variant_output_format GELI -lod 5";
|
||||
}
|
||||
|
||||
private static String OneMb1StateMD5 = "d5404668e76f206055f03d97162ea6d9";
|
||||
private static String OneMb3StateMD5 = "46fb7b66da3dac341e9c342f751d74cd";
|
||||
private static String OneMbEmpiricalMD5 = "ea0be2fd074a6c824a0670ad5b3e0aca";
|
||||
private static String OneMb1StateMD5 = "ca300adf2442d8874b4f13819547a7fb";
|
||||
private static String OneMb3StateMD5 = "45caee5f71a30c7175c7389b594369b1";
|
||||
private static String OneMbEmpiricalMD5 = "c8602b745b5c024f09938a9f87f923cf";
|
||||
|
||||
// private static String oneMbMD5(BaseMismatchModel m) {
|
||||
// switch (m) {
|
||||
|
|
@ -50,7 +50,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -m empirical",
|
||||
1,
|
||||
Arrays.asList("b8975b303952edff3b0273165ba91001"));
|
||||
Arrays.asList("09f7ea954a44e68b80726147aee10944"));
|
||||
|
||||
executeTest(String.format("testMultiTechnologies"), spec);
|
||||
}
|
||||
|
|
@ -85,7 +85,7 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
public void genotypeTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -m empirical --genotype", 1,
|
||||
Arrays.asList("7e5dec6481bbbc890493925da9a8f691"));
|
||||
Arrays.asList("b4c6d84ea14f9c34b04b799d3edd199a"));
|
||||
executeTest("genotypeTest", spec);
|
||||
}
|
||||
|
||||
|
|
@ -144,8 +144,8 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testLOD() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 10.0, "e4c51dca6f1fa999f4399b7412829534" );
|
||||
e.put( 3.0, "d804c24d49669235e3660e92e664ba1a" );
|
||||
e.put( 10.0, "3c6c40551c42f693736dab5e29f7a76c" );
|
||||
e.put( 3.0, "5c82a81f5919b29058f9ca031c00b7ef" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -163,9 +163,9 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "ca9986b32aac0d6ad6058f4bf10e7df2" );
|
||||
e.put( 0.0001, "55d4e3e73215b70b22a8e689a4e16d37" );
|
||||
e.put( 1.0 / 1850, "1ae2126f1a6490d6edd15d95bce726c4" );
|
||||
e.put( 0.01, "c6fc697d31cfda563145138126561c77" );
|
||||
e.put( 0.0001, "1a1466b7599c60fad75d0513a0c8496e" );
|
||||
e.put( 1.0 / 1850, "5758dbcdea158fb053fcea22b06befe8" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -174,4 +174,16 @@ public class SingleSampleGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test the output of a binary geli file
|
||||
*/
|
||||
@Test
|
||||
public void empirical1MbTestBinaryGeli() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseTestString() + " -L 1:10,000,000-11,000,000 -m empirical --variant_output_format GELI_BINARY -lod 5", 1,
|
||||
Arrays.asList("17e9fc04b2a05cafc53562997c28e127"));
|
||||
executeTest("empirical1MbTest", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue