Emit the MLE AC and AF in the INFO field of the UG output

This commit is contained in:
Eric Banks 2012-06-18 12:19:36 -04:00
parent 677babf546
commit 82a2c40338
4 changed files with 52 additions and 37 deletions

View File

@ -262,12 +262,14 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
// also, check to see whether comp rods were included
if ( dbsnp.dbsnp.isBound() )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
// FORMAT and INFO fields
// FORMAT fields
headerInfo.addAll(getSupportedHeaderStrings());
// FILTER fields

View File

@ -296,6 +296,7 @@ public class UnifiedGenotyperEngine {
// determine which alternate alleles have AF>0
final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size());
myAlleles.add(vc.getReference());
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
final Allele alternateAllele = vc.getAlternateAllele(i);
@ -304,11 +305,12 @@ public class UnifiedGenotyperEngine {
if ( indexOfAllele == -1 )
continue;
int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1];
final int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1];
// if the most likely AC is not 0, then this is a good alternate allele to use
if ( indexOfBestAC != 0 ) {
myAlleles.add(alternateAllele);
alleleCountsofMLE.add(AFresult.getAlleleCountsOfMLE()[indexOfAllele-1]);
bestGuessIsRef = false;
}
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
@ -355,6 +357,7 @@ public class UnifiedGenotyperEngine {
// create the genotypes
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
builder.genotypes(genotypes);
// print out stats if we have a writer
if ( verboseWriter != null && !limitedContext )
@ -370,6 +373,16 @@ public class UnifiedGenotyperEngine {
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
// add the MLE AC and AF annotations
if ( alleleCountsofMLE.size() > 0 ) {
attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
final double AN = (double)builder.make().getCalledChrCount();
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
for ( int AC : alleleCountsofMLE )
MLEfrequencies.add((double)AC / AN);
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
}
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
//final boolean DEBUG_SLOD = false;
@ -413,7 +426,6 @@ public class UnifiedGenotyperEngine {
}
// finish constructing the resulting VC
builder.genotypes(genotypes);
builder.attributes(attributes);
VariantContext vcCall = builder.make();

View File

@ -33,7 +33,9 @@ public final class VCFConstants {
// reserved INFO/FORMAT field keys
public static final String ANCESTRAL_ALLELE_KEY = "AA";
public static final String ALLELE_COUNT_KEY = "AC";
public static final String MLE_ALLELE_COUNT_KEY = "MLEAC";
public static final String ALLELE_FREQUENCY_KEY = "AF";
public static final String MLE_ALLELE_FREQUENCY_KEY = "MLEAF";
public static final String ALLELE_NUMBER_KEY = "AN";
public static final String RMS_BASE_QUALITY_KEY = "BQ";
public static final String CIGAR_KEY = "CIGAR";

View File

@ -8,7 +8,6 @@ import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
// ********************************************************************************** //
// Note that this class also serves as an integration test for the VariantAnnotator! //
@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("b6c677b2375541fd2db775d0029571e6"));
Arrays.asList("1c6ea045819b151bcd9d98947c5d4c4d"));
executeTest("test MultiSample Pilot1", spec);
}
@ -37,7 +36,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("3400dfae6db8ed7e1351b1aa52341714"));
Arrays.asList("c09dfbfc5b76acacb616730eaa83a150"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -45,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("0bb67b07ee5315d0486f3a0045a03757"));
Arrays.asList("c51d037e0b1cd0ed3a1cd6c6b29646cf"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -53,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("5c5bf3d2676e1a26d521f1f902f73526"));
Arrays.asList("0a085eac119c91d63fdd4a7e9a5e45af"));
executeTest("test SingleSample Pilot2", spec);
}
@ -61,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
Arrays.asList("eb6c8b7680f40b5fdac6e451c623ab81"));
Arrays.asList("bdbb67743c9f75ac60d0a10f94856361"));
executeTest("test Multiple SNP alleles", spec);
}
@ -69,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("e2cf97bca4a720ca64ca7f682da6c9f0"));
Arrays.asList("bf60763a6e9c9d3987cfbac43b941a48"));
executeTest("test bad read", spec);
}
@ -77,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("0c195201574815559757885c693b6640"));
Arrays.asList("1e991a6a7288be7ac603ef6467fb1ac2"));
executeTest("test reverse trim", spec);
}
@ -87,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "6209a19a33ac9e187a9074cee549f93b";
private final static String COMPRESSED_OUTPUT_MD5 = "3136826ec99366b0285b278aba35cec1";
@Test
public void testCompressedOutput() {
@ -108,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "34cb7146c037925e8f324cffd986834d";
String md5 = "7824468b8290ffb7795a1ec3e493c1a4";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -140,7 +139,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("f48e4898c741c84354da3a0562cb44e1"));
Arrays.asList("86121f5094f26c8b2e320c1f5dea4ae3"));
executeTest("test min_base_quality_score 26", spec);
}
@ -148,7 +147,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("f4ef85f1ed72e35b91b0469edf5956ad"));
Arrays.asList("fe4a96f0049edd466c030def4c62a224"));
executeTest("test SLOD", spec);
}
@ -156,7 +155,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("ea219bdce9596e8649ad1d39e24e333a"));
Arrays.asList("ca8d0d91fd0cef93d4a606dec84a7986"));
executeTest("test NDA", spec);
}
@ -164,23 +163,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("9d5c51379e1b1031da5735aa8c965766"));
Arrays.asList("7c3518d356c05c6b9a8918357c260bfe"));
executeTest("test using comp track", spec);
}
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "ac8bea16be247d9e39d66a6305409f57");
testOutputParameters("-sites_only", "fe204cef499e5aceb2732ba2e45903ad");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1e908a3164adbab10dcb6415e2645954");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1ab8b68891d1531923a40d594250e8e0");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "eee23523912b51b249472e6d5fc0aece");
testOutputParameters("--output_mode EMIT_ALL_SITES", "ab179ef6ece3ab9e6b1ff5800cb89ebd");
}
private void testOutputParameters(final String args, final String md5) {
@ -194,7 +193,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("355bee3d375e994e4a3b07f7a8d267a0"));
Arrays.asList("a19c6195211b0ff036c746c7e11490ed"));
executeTest("test confidence 1", spec1);
}
@ -202,7 +201,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("72d9ea93591b17535b7f5b53e1d064cb"));
Arrays.asList("3fc3c36edaac133b4c11b20a5af915c4"));
executeTest("test confidence 2", spec2);
}
@ -213,12 +212,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "0ffd19f90b05652e45f58e4a959ae304" );
testHeterozosity( 0.01, "82caf6c25d3aeabf7978016474e04fd0" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "b6dbfb567e433273fe90b0d038556a9f" );
testHeterozosity( 1.0 / 1850, "d2a7ba1fa2d1a4153f685f3b3f6d55a2" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -242,7 +241,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("c9675bc1ca6c82cb60d39d9395881c96"));
Arrays.asList("b574087efc5b259f69c429f1f415da0a"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -261,7 +260,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("6e4089986d08d46a8d0b4ddfd611a7c3"));
Arrays.asList("1b9556725b6a2cb52ad6745e9eca37e6"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -280,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("80a5a499cc553ee579ba93dcb967e5ef"));
Arrays.asList("9388a1216957c4722fe54af06a05f242"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -295,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("9271105e630ab39cf1c88b338da54594"));
Arrays.asList("8f942000baaf522fcea29691fe5ef75d"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -308,7 +307,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("d77a379429ca848cea552c4697b86472"));
Arrays.asList("4b8822ccc9ac04bee37bf0c9922108f9"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -318,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("f83c4f370ed0a343ca0808e5da3d997d"));
Arrays.asList("13160041d8ebfb2080981f89e39eeb4f"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -328,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("ca5459e93a9955aec8f93abf7f84e5ed"));
Arrays.asList("59e874d76e42eafd98ad961eb70706bc"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -336,13 +335,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("04aaeff1e9f97bbf2dc2d6d754f25a0d"));
Arrays.asList("eaef9cc984a95b5ccb4c4c1f7c20c235"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019"));
Arrays.asList("b4df2bf0d820c6fc11fabcafe18bb769"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -352,7 +351,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("3e3ac23846801c34acbf10a1a527264a"));
Arrays.asList("95226301a014347efc90e5f750a0db60"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@ -385,7 +384,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("90e8140f114e026f2a0e7a881baa3f20"));
Arrays.asList("a3ea0eea74f2031ebb2ea0edfa14c945"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -393,7 +392,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("db70b7a015fa882c8ce1e4c43f589f22"));
Arrays.asList("a3741b9de95e5858640220d62a0d318c"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -401,7 +400,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("50a6774b7d8f71fe0e125c204d50ba84"));
Arrays.asList("c1911f6ede7b4e8e83209ead66329596"));
executeTest("test minIndelFraction 1.0", spec);
}
}