3 changes to UG which break integration tests:
1. emit AA,AB,BB likelihoods in the FORMAT field for Mark 2. remove constraint that genotype alleles (in the GT field) need to be lexigraphically sorted. 3. Add bam file(s) used by genotyper to header for Kiran git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2963 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
aa7191353a
commit
5a20bf0e64
|
|
@ -162,8 +162,7 @@ public class VariantContextAdaptors {
|
|||
alleles.add(refAllele);
|
||||
for ( String alt : vcf.getAlternateAlleleList() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
// todo -- cleanup
|
||||
System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
//System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
return null;
|
||||
}
|
||||
alleles.add(new Allele(alt, false));
|
||||
|
|
@ -288,15 +287,7 @@ public class VariantContextAdaptors {
|
|||
for ( Genotype g : vc.getGenotypesSortedByName() ) {
|
||||
List<VCFGenotypeEncoding> encodings = new ArrayList<VCFGenotypeEncoding>(g.getPloidy());
|
||||
|
||||
// TODO -- REMOVE ME ONCE INTEGRATION TESTS PASS!!!
|
||||
ArrayList<Allele> temporaryList = new ArrayList<Allele>(g.getAlleles());
|
||||
Collections.sort(temporaryList, new Comparator<Allele>() {
|
||||
public int compare(Allele a1, Allele a2) {
|
||||
return a1.toString().charAt(0) - a2.toString().charAt(0);
|
||||
}
|
||||
});
|
||||
for ( Allele a : temporaryList ) {
|
||||
//for ( Allele a : g.getAlleles() ) {
|
||||
for ( Allele a : g.getAlleles() ) {
|
||||
encodings.add(alleleMap.get(a.toString()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
|
|
@ -93,6 +94,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
|
||||
Allele refAllele = new Allele(Character.toString(ref), true);
|
||||
Allele altAllele = new Allele(Character.toString(alt), false);
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
|
||||
for ( String sample : GLs.keySet() ) {
|
||||
|
||||
|
|
@ -113,17 +117,21 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
CalledGenotype cg = new CalledGenotype(sample, myAlleles, AFbasedGenotype.second);
|
||||
cg.setLikelihoods(GLs.get(sample).getLikelihoods());
|
||||
cg.setPosteriors(GLs.get(sample).getPosteriors());
|
||||
cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup());
|
||||
|
||||
double[] posteriors = GLs.get(sample).getPosteriors();
|
||||
cg.setPosteriors(posteriors);
|
||||
String GL = String.format("%.2f,%.2f,%.2f",
|
||||
posteriors[refGenotype.ordinal()],
|
||||
posteriors[hetGenotype.ordinal()],
|
||||
posteriors[homGenotype.ordinal()]);
|
||||
cg.putAttribute(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY, GL);
|
||||
|
||||
calls.put(sample, cg);
|
||||
}
|
||||
|
||||
// output to beagle file if requested
|
||||
if ( beagleWriter != null ) {
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
for ( String sample : samples ) {
|
||||
GenotypeLikelihoods gl = GLs.get(sample);
|
||||
if ( gl == null ) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.*;
|
|||
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -139,7 +140,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
|
||||
|
||||
// FORMAT fields if not in POOLED mode
|
||||
// FORMAT and INFO fields if not in POOLED mode
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
|
||||
|
|
@ -153,6 +154,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args);
|
||||
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
|
||||
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
|
||||
// also, the list of input bams
|
||||
for ( File file : getToolkit().getArguments().samFiles )
|
||||
headerInfo.add(new VCFHeaderLine("UG_bam_file_used", file.getName()));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,16 +6,18 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class emcompasses all the basic information about a genotype. It is immutable.
|
||||
* This class emcompasses all the basic information about a called genotype.
|
||||
*
|
||||
* @author ebanks
|
||||
*/
|
||||
public class CalledGenotype extends MutableGenotype {
|
||||
|
||||
// key names for standard genotype attributes
|
||||
public static final String LIKELIHOODS_ATTRIBUTE_KEY = "Likelihoods";
|
||||
public static final String POSTERIORS_ATTRIBUTE_KEY = "Posteriors";
|
||||
public static final String READBACKEDPILEUP_ATTRIBUTE_KEY = "ReadBackedPileup";
|
||||
|
||||
|
||||
public CalledGenotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesArePhased) {
|
||||
super(sampleName, alleles, negLog10PError, filters, attributes, genotypesArePhased);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ public class VCFGenotypeRecord {
|
|||
public static final String DEPTH_KEY = "DP";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
public static final String GENOTYPE_FILTER_KEY = "FT";
|
||||
public static final String GENOTYPE_POSTERIORS_TRIPLET_KEY = "GL";
|
||||
public static final String OLD_DEPTH_KEY = "RD";
|
||||
|
||||
// the values for empty fields
|
||||
|
|
@ -313,6 +314,8 @@ public class VCFGenotypeRecord {
|
|||
result = String.valueOf(MISSING_DEPTH);
|
||||
else if ( field.equals(GENOTYPE_FILTER_KEY) )
|
||||
result = UNFILTERED;
|
||||
else if ( field.equals(GENOTYPE_POSTERIORS_TRIPLET_KEY) )
|
||||
result = "0,0,0";
|
||||
// TODO -- support haplotype quality
|
||||
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )
|
||||
// result = String.valueOf(MISSING_HAPLOTYPE_QUALITY);
|
||||
|
|
@ -324,6 +327,7 @@ public class VCFGenotypeRecord {
|
|||
result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_POSTERIORS_TRIPLET_KEY, 3, VCFFormatHeaderLine.INFO_TYPE.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import java.util.*;
|
|||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @author aaron, ebanks
|
||||
* <p/>
|
||||
* Class VCFGenotypeWriterAdapter
|
||||
* <p/>
|
||||
|
|
@ -33,7 +33,8 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
// standard genotype format strings
|
||||
private static String[] standardGenotypeFormatStrings = { VCFGenotypeRecord.GENOTYPE_KEY,
|
||||
VCFGenotypeRecord.DEPTH_KEY,
|
||||
VCFGenotypeRecord.GENOTYPE_QUALITY_KEY };
|
||||
VCFGenotypeRecord.GENOTYPE_QUALITY_KEY,
|
||||
VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY };
|
||||
|
||||
public VCFGenotypeWriterAdapter(File writeTo) {
|
||||
if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("5a86c53e8f0897a71ff74662c5696dc4"));
|
||||
Arrays.asList("c30af5d192661abd77b05a316f1d8923"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("2f928b1261963044ca1781601cae4bf7"));
|
||||
Arrays.asList("882b2fae1cd1ba65cac3cadacec0ce2b"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("10fe265d0140243b52f500c3882230f2"));
|
||||
Arrays.asList("aa0cff414e6623c36465726a987a645d"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("5c455e1a33e3f82d13898b20ee71ac69"));
|
||||
Arrays.asList("53df224164083cc7d8ad85f3d16ba38f"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testParallelization() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1,
|
||||
Arrays.asList("c827c74e59263b0f6b526089b050c100"));
|
||||
Arrays.asList("3ade750c0d261594ea549db7b127a1e3"));
|
||||
executeTest("test parallelization", spec);
|
||||
}
|
||||
|
||||
|
|
@ -77,11 +77,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "8818584bf7053ecf52844f3b404ab630" );
|
||||
e.put( "-all_bases", "bf6ea1c04bf52002e3365c1f47103d4a" );
|
||||
e.put( "--min_base_quality_score 26", "3f192cd301f057698d0bc6c41841ce81" );
|
||||
e.put( "--min_mapping_quality_score 26", "2631025243ac6b52df852309235ec8d3" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "e540a2057164e3b05a5d635805f1167e" );
|
||||
e.put( "-genotype", "bee9fa71d70fdde094ab30785d4fa84e" );
|
||||
e.put( "-all_bases", "410cff9d97cd017becd1f6260c7abeeb" );
|
||||
e.put( "--min_base_quality_score 26", "85e1c35d3926afc68761aefea3f41332" );
|
||||
e.put( "--min_mapping_quality_score 26", "1c49a7d5e6ad295c0450b8a35053050f" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "7e7db5a0d859704e12a4b89d35065682" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -95,7 +95,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
|
||||
Arrays.asList("bfa93ee89aa0807b9c4e4793363452b4"));
|
||||
Arrays.asList("c67dd3e97cb188b117074d2c4692fcfa"));
|
||||
executeTest("testConfidence", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testOtherOutput() {
|
||||
String[] md5s = {"8c7dd53a402b727753002ebcd76168ac", "8cba0b8752f18fc620b4697840bc7291"};
|
||||
String[] md5s = {"ce0024816a092af9f998a7561ffb4fb2", "8cba0b8752f18fc620b4697840bc7291"};
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper" +
|
||||
" -R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
|
|
|
|||
Loading…
Reference in New Issue