1) AlleleBalance is no longer a standard annotation, but the Allelic Depth (AD) is for each sample.

2) Small fixes in the VCFWriter:
a) Trailing missing values weren't being removed if their count was > 1 (e.g. ".,.")
b) We were handling key values that were Lists, but not Arrays.  We now handle both.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3956 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-06 12:05:14 +00:00
parent c68625f055
commit 341e752c6c
6 changed files with 61 additions and 78 deletions

View File

@ -42,7 +42,7 @@ import java.util.List;
import java.util.Arrays;
public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
public class AlleleBalance implements InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
@ -13,64 +12,54 @@ import org.broadinstitute.sting.utils.pileup.*;
import java.util.*;
public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalAnnotation {
public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() ) {
return annotateSNP(stratifiedContext, vc);
} else if ( vc.isIndel() ) {
return annotateIndel(stratifiedContext, vc);
} else {
if ( !vc.isBiallelic() )
return null;
}
if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc);
return null;
}
private Map<String,Object> annotateSNP(StratifiedAlignmentContext stratifiedContext, VariantContext vc) {
Set<Allele> altAlleles = vc.getAlternateAlleles();
if ( altAlleles.size() == 0 )
return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : altAlleles )
alleleCounts.put(allele.getBases()[0], 0);
alleleCounts.put(vc.getReference().getBases()[0], 0);
alleleCounts.put(vc.getAlternateAllele(0).getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
for (PileupElement p : pileup ) {
for ( PileupElement p : pileup ) {
if ( alleleCounts.containsKey(p.getBase()) )
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
StringBuffer sb = new StringBuffer();
// we need to add counts in the correct order
for ( Allele allele : vc.getAlleles() ) {
if ( alleleCounts.containsKey(allele.getBases()[0]) ) {
if ( sb.length() > 0 )
sb.append(",");
sb.append(String.format("%d", alleleCounts.get(allele.getBases()[0])));
}
}
Integer[] counts = new Integer[2];
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
counts[1] = alleleCounts.get(vc.getAlternateAllele(0).getBases()[0]);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), sb.toString());
map.put(getKeyNames().get(0), counts);
return map;
}
private Map<String,Object> annotateIndel(StratifiedAlignmentContext stratifiedContext, VariantContext vc) {
ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
if ( pileup == null ) {
if ( pileup == null )
return null;
}
// get identities and lengths for indel events
// TODO -- Insertions and deletions represented at the same locus
HashMap<Integer,Integer> countsBySize = new HashMap<Integer,Integer>();
for ( Allele al : vc.getAlternateAlleles() ) {
countsBySize.put(al.length(),0);
}
Integer[] counts = new Integer[2];
// TODO -- fix me
/*
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length
if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) {
@ -78,23 +67,14 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA
}
}
}
StringBuffer sb = new StringBuffer();
char type = vc.isDeletion() ? 'D' : 'I';
for ( int len : countsBySize.keySet() ) {
if ( sb.length() > 0 ) {
sb.append(',');
}
sb.append(String.format("%d%s%d",len,type,countsBySize.get(len)));
}
*/
Map<String,Object> map = new HashMap<String,Object>();
map.put(getKeyNames().get(0),sb.toString());
map.put(getKeyNames().get(0), counts);
return map;
}
public List<String> getKeyNames() { return Arrays.asList("AD"); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Integer, "Depth in genotypes for each ALT allele, in the same order as listed")); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 2, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles; currently only works for bi-allelic sites")); }
}

View File

@ -315,10 +315,6 @@ public class VCFWriter {
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
VCFHeaderLineType formatType = metaData.getType();
if ( !(val instanceof String) )
val = formatType.convert(String.valueOf(val), VCFCompoundHeaderLine.SupportedHeaderLineType.FORMAT);
int numInFormatField = metaData.getCount();
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
@ -340,7 +336,7 @@ public class VCFWriter {
// strip off trailing missing values
for (int i = attrs.size()-1; i >= 0; i--) {
if ( attrs.get(i).equals(VCFConstants.MISSING_VALUE_v4) )
if ( isMissingValue(attrs.get(i)) )
attrs.remove(i);
else
break;
@ -353,6 +349,11 @@ public class VCFWriter {
}
}
private boolean isMissingValue(String s) {
// we need to deal with the case that it's a list of missing values
return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length());
}
private void writeAllele(Allele allele, Map<Allele, String> alleleMap) throws IOException {
String encoding = alleleMap.get(allele);
if ( encoding == null )
@ -369,13 +370,15 @@ public class VCFWriter {
else if ( val instanceof Boolean )
result = (Boolean)val ? "" : null; // empty string for true, null for false
else if ( val instanceof List ) {
List list = (List)val;
if ( list.size() == 0 )
result = formatVCFField(((List)val).toArray());
} else if ( val instanceof Object[] ) {
Object[] array = (Object[])val;
if ( array.length == 0 )
return formatVCFField(null);
StringBuffer sb = new StringBuffer(formatVCFField(list.get(0)));
for ( int i = 1; i < list.size(); i++) {
StringBuffer sb = new StringBuffer(formatVCFField(array[0]));
for ( int i = 1; i < array.length; i++) {
sb.append(",");
sb.append(formatVCFField(list.get(i)));
sb.append(formatVCFField(array[i]));
}
result = sb.toString();
} else

View File

@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("674541eb78fa6e4f9bee172b3f34bbab"));
Arrays.asList("2f2afe80e52542ac8393526061913040"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("985c55e3f0c41082dc56f7a291ef307a"));
Arrays.asList("cd05490993b1b3aaddbe75a997942ea0"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("c789e8f795cf4b182f717423bf3328f2"));
Arrays.asList("8ce6560869906e7b179bc20349d69892"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("be18a3b9589ea60350fbaf8f7e1dd769"));
Arrays.asList("423b382952fdd8c85b6ed9c0b8cdd172"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("87d124ddfee8537e2052c495777d2b3b"));
Arrays.asList("d3b1d7a271a46228b9a67841969d7055"));
executeTest("test overwriting header", spec);
}
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("24234da54855c892625008fb134e3a88"));
Arrays.asList("e40a69279c30ddf1273f33eefa8da0cd"));
executeTest("not passing it any reads", spec);
}
@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("24d9649943be876e78f76bbf9ff5b501"));
Arrays.asList("fa9ad0cf345d381eafeea750467fc18e"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B compH3,VCF," + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("77980e4f741c09d88f7a91faf86037c6"));
Arrays.asList("5aa6b5c3ce7d9e107269af7e20a20167"));
executeTest("getting DB tag with HM3", spec);
}
}

View File

@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("0c1a5f6372f6d17544a95292bd313c86"));
Arrays.asList("680292498be02796787bc4b2393a003c"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("049c98bc6db7029995f8893f570dd9ad"));
Arrays.asList("1bf2dc92f97f7bd661e61453b657477a"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("6c278b09727cd14523423a6beda627a5"));
Arrays.asList("6cab256a3c984c8938ffe026ed620c36"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParallelization() {
String md5 = "c66c55363fb9e951569667e3ff6eb728";
String md5 = "8f464769be0920ee2bdfd72da2193161";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
@ -90,11 +90,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "dfe1a220ee0966a20af4a2fca9e635fe" );
e.put( "-all_bases", "e7b766485dd37631b3fcf9f655588456" );
e.put( "--min_base_quality_score 26", "e4e318853e091e63166eaef648ec26ac" );
e.put( "--min_mapping_quality_score 26", "57fffb32a3a7a736f6c30ae08ef71c91" );
e.put( "--max_mismatches_in_40bp_window 5", "4f7a5c22247234c4ee3ca2cfbbc16729" );
e.put( "-genotype", "e4735f7d20f8d82d359f89824cfafbad" );
e.put( "-all_bases", "c41acfac9f7908609a8d0ccdcefbd9e4" );
e.put( "--min_base_quality_score 26", "6fea507d70c1b7b74fb6b553bd8904c2" );
e.put( "--min_mapping_quality_score 26", "f8b1a0449f0a49c1c5edb70f4e90b5f8" );
e.put( "--max_mismatches_in_40bp_window 5", "bf29fc94431f7b18fd4c65025294de3c" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -108,12 +108,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("8b4ebcbf330340a437e572a73a99227b"));
Arrays.asList("065be668bbea5342c7700017e131eb2a"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("0d8825a98a8918bed2c02ac44e15dde7"));
Arrays.asList("b58578df1d05926fb7ecd9fecffa9c5e"));
executeTest("testConfidence2", spec2);
}

View File

@ -66,12 +66,12 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "38b7e64b91c726867a604cf95b9cb10a"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "4c667935099544a1863e70ae88ddd685"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cbd061cf35e0ffc48621111a602f3871"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "17c8468b1b963c9abc49dff17fd811ba"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "f3fce9ae729548e7e7c378a8282df235"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f2a2854f26734c8c3412f842a245fd34"); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "7fce3e9539ce4eb0a839d9236c928dfc"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "a66a799b1ae9fd09b40f78af6ef538d8"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "528cff763b95b82717008d2c05e8c393"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "a9126d1cbe1fdf741236763fb3e3461f"); }
@ -86,7 +86,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("37c66ca16fb5f890bcaa8e226a5f2e61"));
Arrays.asList("daf2c43b629c9fc8d5f064e05bbc51b7"));
executeTest("threeWayWithRefs", spec);
}