diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index f015ab441..2879cd847 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -162,8 +162,7 @@ public class VariantContextAdaptors { alleles.add(refAllele); for ( String alt : vcf.getAlternateAlleleList() ) { if ( ! Allele.acceptableAlleleBases(alt) ) { - // todo -- cleanup - System.out.printf("Excluding vcf record %s%n", vcf); + //System.out.printf("Excluding vcf record %s%n", vcf); return null; } alleles.add(new Allele(alt, false)); @@ -288,15 +287,7 @@ public class VariantContextAdaptors { for ( Genotype g : vc.getGenotypesSortedByName() ) { List encodings = new ArrayList(g.getPloidy()); - // TODO -- REMOVE ME ONCE INTEGRATION TESTS PASS!!! - ArrayList temporaryList = new ArrayList(g.getAlleles()); - Collections.sort(temporaryList, new Comparator() { - public int compare(Allele a1, Allele a2) { - return a1.toString().charAt(0) - a2.toString().charAt(0); - } - }); - for ( Allele a : temporaryList ) { - //for ( Allele a : g.getAlleles() ) { + for ( Allele a : g.getAlleles() ) { encodings.add(alleleMap.get(a.toString())); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index dda4f8c95..e57eebe1d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.CalledGenotype; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; @@ -93,6 +94,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt); Allele refAllele = new Allele(Character.toString(ref), true); Allele altAllele = new Allele(Character.toString(alt), false); + DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); + DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt); + DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); for ( String sample : GLs.keySet() ) { @@ -113,17 +117,21 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul CalledGenotype cg = new CalledGenotype(sample, myAlleles, AFbasedGenotype.second); cg.setLikelihoods(GLs.get(sample).getLikelihoods()); - cg.setPosteriors(GLs.get(sample).getPosteriors()); cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()); + double[] posteriors = GLs.get(sample).getPosteriors(); + cg.setPosteriors(posteriors); + String GL = String.format("%.2f,%.2f,%.2f", + posteriors[refGenotype.ordinal()], + posteriors[hetGenotype.ordinal()], + posteriors[homGenotype.ordinal()]); + cg.putAttribute(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY, GL); + calls.put(sample, cg); } // output to beagle file if requested if ( beagleWriter != null ) { - DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); - DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt); - DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); for ( String sample : samples ) { GenotypeLikelihoods gl = GLs.get(sample); if ( gl == null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index d8662b6ac..b02b1649a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.*; import java.util.*; import java.io.PrintStream; +import java.io.File; /** @@ -139,7 +140,7 @@ public class UnifiedGenotyper extends LocusWalker commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args); for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue())); + // also, the list of input bams + for ( File file : getToolkit().getArguments().samFiles ) + headerInfo.add(new VCFHeaderLine("UG_bam_file_used", file.getName())); return headerInfo; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java index 809caee1c..b36abc6fb 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java @@ -6,16 +6,18 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; /** - * This class emcompasses all the basic information about a genotype. It is immutable. + * This class emcompasses all the basic information about a called genotype. * * @author ebanks */ public class CalledGenotype extends MutableGenotype { + // key names for standard genotype attributes public static final String LIKELIHOODS_ATTRIBUTE_KEY = "Likelihoods"; public static final String POSTERIORS_ATTRIBUTE_KEY = "Posteriors"; public static final String READBACKEDPILEUP_ATTRIBUTE_KEY = "ReadBackedPileup"; + public CalledGenotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean genotypesArePhased) { super(sampleName, alleles, negLog10PError, filters, attributes, genotypesArePhased); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 1975f1732..2a9050f4d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -19,6 +19,7 @@ public class VCFGenotypeRecord { public static final String DEPTH_KEY = "DP"; public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; public static final String GENOTYPE_FILTER_KEY = "FT"; + public static final String GENOTYPE_POSTERIORS_TRIPLET_KEY = "GL"; public static final String OLD_DEPTH_KEY = "RD"; // the values for empty fields @@ -313,6 +314,8 @@ public class VCFGenotypeRecord { result = String.valueOf(MISSING_DEPTH); else if ( field.equals(GENOTYPE_FILTER_KEY) ) result = UNFILTERED; + else if ( field.equals(GENOTYPE_POSTERIORS_TRIPLET_KEY) ) + result = "0,0,0"; // TODO -- support haplotype quality //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) // result = String.valueOf(MISSING_HAPLOTYPE_QUALITY); @@ -324,6 +327,7 @@ public class VCFGenotypeRecord { result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.String, "Genotype")); result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Float, "Genotype Quality")); result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (only filtered reads used for calling)")); + result.add(new VCFFormatHeaderLine(GENOTYPE_POSTERIORS_TRIPLET_KEY, 3, VCFFormatHeaderLine.INFO_TYPE.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality")); return result; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 0c3654da5..cafc561cd 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -11,7 +11,7 @@ import java.util.*; /** - * @author aaron + * @author aaron, ebanks *

* Class VCFGenotypeWriterAdapter *

@@ -33,7 +33,8 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { // standard genotype format strings private static String[] standardGenotypeFormatStrings = { VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.DEPTH_KEY, - VCFGenotypeRecord.GENOTYPE_QUALITY_KEY }; + VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, + VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY }; public VCFGenotypeWriterAdapter(File writeTo) { if (writeTo == null) throw new RuntimeException("VCF output file must not be null"); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 15cf965f9..926917f69 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("5a86c53e8f0897a71ff74662c5696dc4")); + Arrays.asList("c30af5d192661abd77b05a316f1d8923")); executeTest("testPooled1", spec); } @@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("2f928b1261963044ca1781601cae4bf7")); + Arrays.asList("882b2fae1cd1ba65cac3cadacec0ce2b")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("10fe265d0140243b52f500c3882230f2")); + Arrays.asList("aa0cff414e6623c36465726a987a645d")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("5c455e1a33e3f82d13898b20ee71ac69")); + Arrays.asList("53df224164083cc7d8ad85f3d16ba38f")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testParallelization() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1, - Arrays.asList("c827c74e59263b0f6b526089b050c100")); + Arrays.asList("3ade750c0d261594ea549db7b127a1e3")); executeTest("test parallelization", spec); } @@ -77,11 +77,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "8818584bf7053ecf52844f3b404ab630" ); - e.put( "-all_bases", "bf6ea1c04bf52002e3365c1f47103d4a" ); - e.put( "--min_base_quality_score 26", "3f192cd301f057698d0bc6c41841ce81" ); - e.put( "--min_mapping_quality_score 26", "2631025243ac6b52df852309235ec8d3" ); - e.put( "--max_mismatches_in_40bp_window 5", "e540a2057164e3b05a5d635805f1167e" ); + e.put( "-genotype", "bee9fa71d70fdde094ab30785d4fa84e" ); + e.put( "-all_bases", "410cff9d97cd017becd1f6260c7abeeb" ); + e.put( "--min_base_quality_score 26", "85e1c35d3926afc68761aefea3f41332" ); + e.put( "--min_mapping_quality_score 26", "1c49a7d5e6ad295c0450b8a35053050f" ); + e.put( "--max_mismatches_in_40bp_window 5", "7e7db5a0d859704e12a4b89d35065682" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -95,7 +95,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("bfa93ee89aa0807b9c4e4793363452b4")); + Arrays.asList("c67dd3e97cb188b117074d2c4692fcfa")); executeTest("testConfidence", spec); } @@ -106,7 +106,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testOtherOutput() { - String[] md5s = {"8c7dd53a402b727753002ebcd76168ac", "8cba0b8752f18fc620b4697840bc7291"}; + String[] md5s = {"ce0024816a092af9f998a7561ffb4fb2", "8cba0b8752f18fc620b4697840bc7291"}; WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper" + " -R " + oneKGLocation + "reference/human_b36_both.fasta" +