Thanks to Guillermo, I found a bug in the Unified Genotyper output: GL was posteriors instead of likelihoods. Not a huge deal because the

priors were flat, but fixed nonetheless.
Also, needed to update Tribble.
Minor updates to the Beagle input maker.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3461 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-05-28 19:28:26 +00:00
parent 4e268ef6ac
commit ffeb3fd80d
8 changed files with 44 additions and 51 deletions

View File

@ -58,7 +58,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// Don't allow mixed types for now
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY };
private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY };
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )

View File

@ -156,13 +156,13 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup());
cg.putAttribute(VCFGenotypeRecord.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
double[] posteriors = GLs.get(sample).getPosteriors();
cg.setPosteriors(posteriors);
cg.setPosteriors(GLs.get(sample).getPosteriors());
double[] likelihoods = GLs.get(sample).getLikelihoods();
String GL = String.format("%.2f,%.2f,%.2f",
posteriors[refGenotype.ordinal()],
posteriors[hetGenotype.ordinal()],
posteriors[homGenotype.ordinal()]);
cg.putAttribute(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY, GL);
likelihoods[refGenotype.ordinal()],
likelihoods[hetGenotype.ordinal()],
likelihoods[homGenotype.ordinal()]);
cg.putAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY, GL);
calls.put(sample, cg);
}

View File

@ -25,10 +25,9 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -37,34 +36,32 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.io.PrintStream;
import java.util.*;
/**
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF fiel
*/
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF file
*/
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = true)
public PrintStream beagleWriter = null;
final TreeSet<String> samples = new TreeSet<String>();
public void initialize() {
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
for ( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(VCFCodec.class) ) {
if ( rod.getType().equals(VCFRecord.class) ) {
final VCFReader reader = new VCFReader(rod.getFile());
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);
@ -72,15 +69,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
}
}
beagleWriter.print("marker alleleA alleleB");
for ( String sample : samples )
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
if ( beagleWriter != null ) {
beagleWriter.print("marker alleleA alleleB");
for ( String sample : samples )
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
beagleWriter.println();
}
beagleWriter.println();
}
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
@ -91,7 +84,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
try {
vc_eval = tracker.getVariantContext(ref,"eval", vc, loc, true);
vc_eval = tracker.getVariantContext(ref,"vcf", vc, loc, true);
} catch (java.util.NoSuchElementException e) {
return 0;
}
@ -110,23 +103,23 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
beagleWriter.print(String.format("%s ", bglPrintString));
}
Map<String, Genotype> genotypes = vc_eval.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
if ( !vc_eval.hasGenotypes() )
return null;
Map<String, Genotype> genotypes = vc_eval.getGenotypes();
for ( String sample : samples ) {
// use sample as key into genotypes structure
Genotype genotype = genotypes.get(sample);
String gls = "0 0 0 ";
if (genotype.isCalled()) {
String[] glArray = genotype.getAttributeAsString("GL").split(",");
if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) {
String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(",");
for (String gl : glArray) {
Double d_gl = -Double.valueOf(gl);
Double d_gl = Math.pow(10, Double.valueOf(gl));
beagleWriter.print(String.format("%5.2f ",d_gl));
}
}
else
beagleWriter.print(gls); // write 0 likelihoods for uncalled genotypes.
beagleWriter.print("0 0 0 "); // write 0 likelihoods for uncalled genotypes.
}

View File

@ -159,9 +159,9 @@ public class TrioGenotyperWalker extends RefWalker<VariantContext, Integer>{
* @return
*/
private double genotypeL( List<Allele> alleles, Genotype genotypeCall ) {
String postTriplet = (String)genotypeCall.getAttribute(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY);
String postTriplet = (String)genotypeCall.getAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY);
if ( postTriplet == null )
throw new StingException("BUG: TrioGenotyperWalker expected genotype likelihood triplets " + VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY);
throw new StingException("BUG: TrioGenotyperWalker expected genotype likelihood triplets " + VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY);
// calculate the offset -- AA => 0, AB => 1, BB => 2
int i = 0;

View File

@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("3f919977584b2c15273cb7de948dd1ac"));
Arrays.asList("d9d99c7ce4ea63a907183893de1dd905"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("d45934ae6537a84bb123db2288ad235e"));
Arrays.asList("23dfd7747ec6149e59abd753d2a8b00c"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("96245f0842534527b66a9daad53134bd"));
Arrays.asList("5ad0d5e8180f5290f511723392a54d03"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParallelization() {
String md5 = "638911a5d7b155076afc79a3d3f50548";
String md5 = "c7bfe0500e933d1546cb40448d7b4c61";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "d301500f0cd5ef1929a1adce7e857f9c" );
e.put( "-all_bases", "70e94c6671a3e5122cb9ad04c0ad138e" );
e.put( "--min_base_quality_score 26", "c508728f9d26cc3a8b73f3a5ee4275f2" );
e.put( "--min_mapping_quality_score 26", "34634ea8f196eaf40dac6e069ebcf09a" );
e.put( "--max_mismatches_in_40bp_window 5", "92903b97b43145c24f19a899ce127aa3" );
e.put( "-genotype", "0ac7ab893a3f550cb1b8c34f28baedf6" );
e.put( "-all_bases", "40520c3020f6abcb60e8b632d9614554" );
e.put( "--min_base_quality_score 26", "ecc1b0dd618eae9b9f62db2742ac3306" );
e.put( "--min_mapping_quality_score 26", "75bd53d5070f1146350d633321a165e3" );
e.put( "--max_mismatches_in_40bp_window 5", "8e1236b7f0f6c19a0be808b2d44e3255" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -103,12 +103,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("cdcc3977ce78e15d7c8153eaf5f9a2df"));
Arrays.asList("ff8a1008ddfd342a20b3e032bade61bc"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("2413aafdcfd3d8ac1169662f72524035"));
Arrays.asList("d469b4367188dd2b9be37ab8effc65c4"));
executeTest("testConfidence2", spec2);
}

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="86" status="integration" publication="20100526124200" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="87" status="integration" publication="20100528124200" />
</ivy-module>