UG now outputs the FORMAT header fields when there's genotype data.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2294 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-09 16:31:07 +00:00
parent 12c49ea485
commit 4e54b91ce4
3 changed files with 27 additions and 9 deletions

View File

@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import java.io.File;
import java.util.*;
@ -152,19 +153,30 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
private Set<String> getHeaderInfo() {
Set<String> headerInfo = new HashSet<String>();
// this is only applicable to VCF
if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
return headerInfo;
// first, the basic info
headerInfo.add("source=UnifiedGenotyper");
headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName());
// annotation (INFO) fields from VariantAnnotator
if ( UAC.ALL_ANNOTATIONS )
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
else
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\"");
headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\"");
if ( !UAC.NO_SLOD )
headerInfo.add("INFO=SB,1,Float,\"Strand Bias\"");
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED )
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
// TODO -- clean this up
headerInfo.add("UG_genotype_model=" + UAC.genotypeModel);
headerInfo.add("UG_base_model=" + UAC.baseModel);
headerInfo.add("UG_heterozygosity=" + UAC.heterozygosity);

View File

@ -3,10 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -264,4 +261,13 @@ public class VCFGenotypeRecord implements Genotype {
// result = String.valueOf(MISSING_HAPLOTYPE_QUALITY);
return result;
}
public static Set<String> getSupportedHeaderStrings() {
Set<String> result = new HashSet<String>();
result.add("FORMAT=" + GENOTYPE_KEY + ",1,String,\"Genotype\"");
result.add("FORMAT=" + GENOTYPE_QUALITY_KEY + ",1,Integer,\"Genotype Quality\"");
result.add("FORMAT=" + DEPTH_KEY + ",1,Integer,\"Read Depth (without MQ0 reads)\"");
//result.add("FORMAT=" + HAPLOTYPE_QUALITY_KEY + ",1,Integer,\"Haplotype Quality\"");
return result;
}
}

View File

@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("7c27fc1ad1fedab6944d5f93094ef914"));
Arrays.asList("c7aec953aac01101f0556db166178590"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("cd08c5e4dbd0cf477efd1204c681c7c6"));
Arrays.asList("e03b5ac5821dc0e0da3cc8a1f686ec6b"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("f4e8721a905e42f511967a1cfdb966ec"));
Arrays.asList("d1076029884939017817061cfd06d8ff"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("962838ae188b1c07a35ead671019062e"));
Arrays.asList("58422eb06214282fc6e9efbf52937e1e"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("a9cf1abecf709de9f158f6ffe57b00e7"));
Arrays.asList("27fadaf914ad7fbc83d4f8367f43bc40"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}