Thanks to Guillermo, I found a bug in the Unified Genotyper output: GL was posteriors instead of likelihoods. Not a huge deal because the
priors were flat, but fixed nonetheless. Also, needed to update Tribble. Minor updates to the Beagle input maker. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3461 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e268ef6ac
commit
ffeb3fd80d
|
|
@ -58,7 +58,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
// Don't allow mixed types for now
|
||||
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
|
||||
|
||||
private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY };
|
||||
private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY };
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
|
|
|
|||
|
|
@ -156,13 +156,13 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup());
|
||||
cg.putAttribute(VCFGenotypeRecord.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
|
||||
|
||||
double[] posteriors = GLs.get(sample).getPosteriors();
|
||||
cg.setPosteriors(posteriors);
|
||||
cg.setPosteriors(GLs.get(sample).getPosteriors());
|
||||
double[] likelihoods = GLs.get(sample).getLikelihoods();
|
||||
String GL = String.format("%.2f,%.2f,%.2f",
|
||||
posteriors[refGenotype.ordinal()],
|
||||
posteriors[hetGenotype.ordinal()],
|
||||
posteriors[homGenotype.ordinal()]);
|
||||
cg.putAttribute(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY, GL);
|
||||
likelihoods[refGenotype.ordinal()],
|
||||
likelihoods[hetGenotype.ordinal()],
|
||||
likelihoods[homGenotype.ordinal()]);
|
||||
cg.putAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY, GL);
|
||||
|
||||
calls.put(sample, cg);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,10 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
|
|
@ -37,34 +36,32 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF fiel
|
||||
*/
|
||||
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF file
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
||||
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||
|
||||
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
|
||||
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = true)
|
||||
public PrintStream beagleWriter = null;
|
||||
|
||||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
|
||||
|
||||
|
||||
public void initialize() {
|
||||
|
||||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
for ( final ReferenceOrderedDataSource source : dataSources ) {
|
||||
final RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(VCFCodec.class) ) {
|
||||
if ( rod.getType().equals(VCFRecord.class) ) {
|
||||
final VCFReader reader = new VCFReader(rod.getFile());
|
||||
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
||||
samples.addAll(vcfSamples);
|
||||
|
|
@ -72,15 +69,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
beagleWriter.print("marker alleleA alleleB");
|
||||
for ( String sample : samples )
|
||||
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
||||
|
||||
if ( beagleWriter != null ) {
|
||||
beagleWriter.print("marker alleleA alleleB");
|
||||
for ( String sample : samples )
|
||||
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
||||
|
||||
beagleWriter.println();
|
||||
}
|
||||
|
||||
beagleWriter.println();
|
||||
}
|
||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
|
||||
|
|
@ -91,7 +84,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
|
||||
try {
|
||||
vc_eval = tracker.getVariantContext(ref,"eval", vc, loc, true);
|
||||
vc_eval = tracker.getVariantContext(ref,"vcf", vc, loc, true);
|
||||
} catch (java.util.NoSuchElementException e) {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -110,23 +103,23 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
beagleWriter.print(String.format("%s ", bglPrintString));
|
||||
}
|
||||
|
||||
Map<String, Genotype> genotypes = vc_eval.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
if ( !vc_eval.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
Map<String, Genotype> genotypes = vc_eval.getGenotypes();
|
||||
for ( String sample : samples ) {
|
||||
// use sample as key into genotypes structure
|
||||
Genotype genotype = genotypes.get(sample);
|
||||
String gls = "0 0 0 ";
|
||||
if (genotype.isCalled()) {
|
||||
String[] glArray = genotype.getAttributeAsString("GL").split(",");
|
||||
if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(",");
|
||||
|
||||
for (String gl : glArray) {
|
||||
Double d_gl = -Double.valueOf(gl);
|
||||
Double d_gl = Math.pow(10, Double.valueOf(gl));
|
||||
beagleWriter.print(String.format("%5.2f ",d_gl));
|
||||
}
|
||||
}
|
||||
else
|
||||
beagleWriter.print(gls); // write 0 likelihoods for uncalled genotypes.
|
||||
beagleWriter.print("0 0 0 "); // write 0 likelihoods for uncalled genotypes.
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -159,9 +159,9 @@ public class TrioGenotyperWalker extends RefWalker<VariantContext, Integer>{
|
|||
* @return
|
||||
*/
|
||||
private double genotypeL( List<Allele> alleles, Genotype genotypeCall ) {
|
||||
String postTriplet = (String)genotypeCall.getAttribute(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY);
|
||||
String postTriplet = (String)genotypeCall.getAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY);
|
||||
if ( postTriplet == null )
|
||||
throw new StingException("BUG: TrioGenotyperWalker expected genotype likelihood triplets " + VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY);
|
||||
throw new StingException("BUG: TrioGenotyperWalker expected genotype likelihood triplets " + VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY);
|
||||
|
||||
// calculate the offset -- AA => 0, AB => 1, BB => 2
|
||||
int i = 0;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("3f919977584b2c15273cb7de948dd1ac"));
|
||||
Arrays.asList("d9d99c7ce4ea63a907183893de1dd905"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("d45934ae6537a84bb123db2288ad235e"));
|
||||
Arrays.asList("23dfd7747ec6149e59abd753d2a8b00c"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("96245f0842534527b66a9daad53134bd"));
|
||||
Arrays.asList("5ad0d5e8180f5290f511723392a54d03"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
String md5 = "638911a5d7b155076afc79a3d3f50548";
|
||||
String md5 = "c7bfe0500e933d1546cb40448d7b4c61";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "d301500f0cd5ef1929a1adce7e857f9c" );
|
||||
e.put( "-all_bases", "70e94c6671a3e5122cb9ad04c0ad138e" );
|
||||
e.put( "--min_base_quality_score 26", "c508728f9d26cc3a8b73f3a5ee4275f2" );
|
||||
e.put( "--min_mapping_quality_score 26", "34634ea8f196eaf40dac6e069ebcf09a" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "92903b97b43145c24f19a899ce127aa3" );
|
||||
e.put( "-genotype", "0ac7ab893a3f550cb1b8c34f28baedf6" );
|
||||
e.put( "-all_bases", "40520c3020f6abcb60e8b632d9614554" );
|
||||
e.put( "--min_base_quality_score 26", "ecc1b0dd618eae9b9f62db2742ac3306" );
|
||||
e.put( "--min_mapping_quality_score 26", "75bd53d5070f1146350d633321a165e3" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "8e1236b7f0f6c19a0be808b2d44e3255" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -103,12 +103,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("cdcc3977ce78e15d7c8153eaf5f9a2df"));
|
||||
Arrays.asList("ff8a1008ddfd342a20b3e032bade61bc"));
|
||||
executeTest("testConfidence1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("2413aafdcfd3d8ac1169662f72524035"));
|
||||
Arrays.asList("d469b4367188dd2b9be37ab8effc65c4"));
|
||||
executeTest("testConfidence2", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="86" status="integration" publication="20100526124200" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="87" status="integration" publication="20100528124200" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue