Adding a new annotation to UG calls: NDA = number of discovered (but not necessarily genotyped) alleles for the site. This could help downstream analysis esp. of indels for wonky sites (since we only use the top 2-3 alleles). Not enabled by default but we can change that if this turns out to be useful.

This commit is contained in:
Eric Banks 2012-04-11 13:47:10 -04:00
parent d2142c3aa7
commit dc90508104
4 changed files with 30 additions and 12 deletions

View File

@ -82,15 +82,22 @@ public class UnifiedArgumentCollection {
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/**
* This argument is not enabled by default because it increases the runtime by an appreciable amount.
* Note that calculating the SLOD increases the runtime by an appreciable amount.
*/
@Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
/**
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
* Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site.
*/
@Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
/**
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
*/
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles;
/**
@ -171,6 +178,7 @@ public class UnifiedArgumentCollection {
uac.GenotypingMode = GenotypingMode;
uac.OutputMode = OutputMode;
uac.NO_SLOD = NO_SLOD;
uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;

View File

@ -249,6 +249,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// annotation (INFO) fields from UnifiedGenotyper
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
// also, check to see whether comp rods were included

View File

@ -51,6 +51,8 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3;
public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4;
@ -365,6 +367,9 @@ public class UnifiedGenotyperEngine {
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
//final boolean DEBUG_SLOD = false;

View File

@ -122,16 +122,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "258c1b33349eb3b2d395ec4d69302725" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("test calling parameter[%s]", entry.getKey()), spec);
}
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("258c1b33349eb3b2d395ec4d69302725"));
executeTest("test min_base_quality_score 26", spec);
}
@Test
@ -142,6 +137,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
executeTest("test SLOD", spec);
}
@Test
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList(""));
executeTest("test NDA", spec);
}
@Test
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(