UG now outputs the FORMAT header fields when there's genotype data.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2294 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
12c49ea485
commit
4e54b91ce4
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.Pair;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -152,19 +153,30 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
private Set<String> getHeaderInfo() {
|
||||
Set<String> headerInfo = new HashSet<String>();
|
||||
|
||||
// this is only applicable to VCF
|
||||
if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
|
||||
return headerInfo;
|
||||
|
||||
// first, the basic info
|
||||
headerInfo.add("source=UnifiedGenotyper");
|
||||
headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName());
|
||||
|
||||
// annotation (INFO) fields from VariantAnnotator
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
|
||||
else
|
||||
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\"");
|
||||
headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\"");
|
||||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add("INFO=SB,1,Float,\"Strand Bias\"");
|
||||
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED )
|
||||
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
|
||||
|
||||
// TODO -- clean this up
|
||||
headerInfo.add("UG_genotype_model=" + UAC.genotypeModel);
|
||||
headerInfo.add("UG_base_model=" + UAC.baseModel);
|
||||
headerInfo.add("UG_heterozygosity=" + UAC.heterozygosity);
|
||||
|
|
|
|||
|
|
@ -3,10 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -264,4 +261,13 @@ public class VCFGenotypeRecord implements Genotype {
|
|||
// result = String.valueOf(MISSING_HAPLOTYPE_QUALITY);
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Set<String> getSupportedHeaderStrings() {
|
||||
Set<String> result = new HashSet<String>();
|
||||
result.add("FORMAT=" + GENOTYPE_KEY + ",1,String,\"Genotype\"");
|
||||
result.add("FORMAT=" + GENOTYPE_QUALITY_KEY + ",1,Integer,\"Genotype Quality\"");
|
||||
result.add("FORMAT=" + DEPTH_KEY + ",1,Integer,\"Read Depth (without MQ0 reads)\"");
|
||||
//result.add("FORMAT=" + HAPLOTYPE_QUALITY_KEY + ",1,Integer,\"Haplotype Quality\"");
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("7c27fc1ad1fedab6944d5f93094ef914"));
|
||||
Arrays.asList("c7aec953aac01101f0556db166178590"));
|
||||
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("cd08c5e4dbd0cf477efd1204c681c7c6"));
|
||||
Arrays.asList("e03b5ac5821dc0e0da3cc8a1f686ec6b"));
|
||||
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("f4e8721a905e42f511967a1cfdb966ec"));
|
||||
Arrays.asList("d1076029884939017817061cfd06d8ff"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("962838ae188b1c07a35ead671019062e"));
|
||||
Arrays.asList("58422eb06214282fc6e9efbf52937e1e"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("a9cf1abecf709de9f158f6ffe57b00e7"));
|
||||
Arrays.asList("27fadaf914ad7fbc83d4f8367f43bc40"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue