diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 6cabbd34d..f03140c51 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -42,7 +42,7 @@ import java.util.List; import java.util.Arrays; -public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation { +public class AlleleBalance implements InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index fec480ca5..0638afb59 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFFormatHeaderLine; @@ -13,64 +12,54 @@ import org.broadinstitute.sting.utils.pileup.*; import java.util.*; -public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalAnnotation { +public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; - if ( vc.isSNP() ) { - return annotateSNP(stratifiedContext, vc); - } else if ( vc.isIndel() ) { - return annotateIndel(stratifiedContext, vc); - } else { + if ( !vc.isBiallelic() ) return null; - } + + if ( vc.isSNP() ) + return annotateSNP(stratifiedContext, vc); + if ( vc.isIndel() ) + return annotateIndel(stratifiedContext, vc); + + return null; } private Map annotateSNP(StratifiedAlignmentContext stratifiedContext, VariantContext vc) { - Set altAlleles = vc.getAlternateAlleles(); - if ( altAlleles.size() == 0 ) - return null; - HashMap alleleCounts = new HashMap(); - for ( Allele allele : altAlleles ) - alleleCounts.put(allele.getBases()[0], 0); + alleleCounts.put(vc.getReference().getBases()[0], 0); + alleleCounts.put(vc.getAlternateAllele(0).getBases()[0], 0); ReadBackedPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); - for (PileupElement p : pileup ) { + for ( PileupElement p : pileup ) { if ( alleleCounts.containsKey(p.getBase()) ) alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); } - StringBuffer sb = new StringBuffer(); // we need to add counts in the correct order - for ( Allele allele : vc.getAlleles() ) { - if ( alleleCounts.containsKey(allele.getBases()[0]) ) { - if ( sb.length() > 0 ) - sb.append(","); - sb.append(String.format("%d", alleleCounts.get(allele.getBases()[0]))); - } - } + Integer[] counts = new Integer[2]; + counts[0] = alleleCounts.get(vc.getReference().getBases()[0]); + counts[1] = alleleCounts.get(vc.getAlternateAllele(0).getBases()[0]); Map map = new HashMap(); - map.put(getKeyNames().get(0), sb.toString()); + map.put(getKeyNames().get(0), counts); return map; } private Map annotateIndel(StratifiedAlignmentContext stratifiedContext, VariantContext vc) { ReadBackedExtendedEventPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); - if ( pileup == null ) { + if ( pileup == null ) return null; - } - // get identities and lengths for indel events - // TODO -- Insertions and deletions represented at the same locus - HashMap countsBySize = new HashMap(); - for ( Allele al : vc.getAlternateAlleles() ) { - countsBySize.put(al.length(),0); - } + Integer[] counts = new Integer[2]; + + // TODO -- fix me + /* for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) { @@ -78,23 +67,14 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA } } } - - StringBuffer sb = new StringBuffer(); - char type = vc.isDeletion() ? 'D' : 'I'; - - for ( int len : countsBySize.keySet() ) { - if ( sb.length() > 0 ) { - sb.append(','); - } - sb.append(String.format("%d%s%d",len,type,countsBySize.get(len))); - } + */ Map map = new HashMap(); - map.put(getKeyNames().get(0),sb.toString()); + map.put(getKeyNames().get(0), counts); return map; } public List getKeyNames() { return Arrays.asList("AD"); } - public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), -1, VCFHeaderLineType.Integer, "Depth in genotypes for each ALT allele, in the same order as listed")); } + public List getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 2, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles; currently only works for bi-allelic sites")); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index 61c540e41..d3e4b7861 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -315,10 +315,6 @@ public class VCFWriter { VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); if ( metaData != null ) { - VCFHeaderLineType formatType = metaData.getType(); - if ( !(val instanceof String) ) - val = formatType.convert(String.valueOf(val), VCFCompoundHeaderLine.SupportedHeaderLineType.FORMAT); - int numInFormatField = metaData.getCount(); if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) { // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. @@ -340,7 +336,7 @@ public class VCFWriter { // strip off trailing missing values for (int i = attrs.size()-1; i >= 0; i--) { - if ( attrs.get(i).equals(VCFConstants.MISSING_VALUE_v4) ) + if ( isMissingValue(attrs.get(i)) ) attrs.remove(i); else break; @@ -353,6 +349,11 @@ public class VCFWriter { } } + private boolean isMissingValue(String s) { + // we need to deal with the case that it's a list of missing values + return (MathUtils.countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + MathUtils.countOccurrences(',', s) == s.length()); + } + private void writeAllele(Allele allele, Map alleleMap) throws IOException { String encoding = alleleMap.get(allele); if ( encoding == null ) @@ -369,13 +370,15 @@ public class VCFWriter { else if ( val instanceof Boolean ) result = (Boolean)val ? "" : null; // empty string for true, null for false else if ( val instanceof List ) { - List list = (List)val; - if ( list.size() == 0 ) + result = formatVCFField(((List)val).toArray()); + } else if ( val instanceof Object[] ) { + Object[] array = (Object[])val; + if ( array.length == 0 ) return formatVCFField(null); - StringBuffer sb = new StringBuffer(formatVCFField(list.get(0))); - for ( int i = 1; i < list.size(); i++) { + StringBuffer sb = new StringBuffer(formatVCFField(array[0])); + for ( int i = 1; i < array.length; i++) { sb.append(","); - sb.append(formatVCFField(list.get(i))); + sb.append(formatVCFField(array[i])); } result = sb.toString(); } else diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 801da7cb3..3763242d7 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("674541eb78fa6e4f9bee172b3f34bbab")); + Arrays.asList("2f2afe80e52542ac8393526061913040")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("985c55e3f0c41082dc56f7a291ef307a")); + Arrays.asList("cd05490993b1b3aaddbe75a997942ea0")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("c789e8f795cf4b182f717423bf3328f2")); + Arrays.asList("8ce6560869906e7b179bc20349d69892")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("be18a3b9589ea60350fbaf8f7e1dd769")); + Arrays.asList("423b382952fdd8c85b6ed9c0b8cdd172")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("87d124ddfee8537e2052c495777d2b3b")); + Arrays.asList("d3b1d7a271a46228b9a67841969d7055")); executeTest("test overwriting header", spec); } @@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("24234da54855c892625008fb134e3a88")); + Arrays.asList("e40a69279c30ddf1273f33eefa8da0cd")); executeTest("not passing it any reads", spec); } @@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("24d9649943be876e78f76bbf9ff5b501")); + Arrays.asList("fa9ad0cf345d381eafeea750467fc18e")); executeTest("getting DB tag with dbSNP", spec); } @@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B compH3,VCF," + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, - Arrays.asList("77980e4f741c09d88f7a91faf86037c6")); + Arrays.asList("5aa6b5c3ce7d9e107269af7e20a20167")); executeTest("getting DB tag with HM3", spec); } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 8d6a402f2..6bd1c8bab 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("0c1a5f6372f6d17544a95292bd313c86")); + Arrays.asList("680292498be02796787bc4b2393a003c")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1, - Arrays.asList("049c98bc6db7029995f8893f570dd9ad")); + Arrays.asList("1bf2dc92f97f7bd661e61453b657477a")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("6c278b09727cd14523423a6beda627a5")); + Arrays.asList("6cab256a3c984c8938ffe026ed620c36")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParallelization() { - String md5 = "c66c55363fb9e951569667e3ff6eb728"; + String md5 = "8f464769be0920ee2bdfd72da2193161"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1, @@ -90,11 +90,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "dfe1a220ee0966a20af4a2fca9e635fe" ); - e.put( "-all_bases", "e7b766485dd37631b3fcf9f655588456" ); - e.put( "--min_base_quality_score 26", "e4e318853e091e63166eaef648ec26ac" ); - e.put( "--min_mapping_quality_score 26", "57fffb32a3a7a736f6c30ae08ef71c91" ); - e.put( "--max_mismatches_in_40bp_window 5", "4f7a5c22247234c4ee3ca2cfbbc16729" ); + e.put( "-genotype", "e4735f7d20f8d82d359f89824cfafbad" ); + e.put( "-all_bases", "c41acfac9f7908609a8d0ccdcefbd9e4" ); + e.put( "--min_base_quality_score 26", "6fea507d70c1b7b74fb6b553bd8904c2" ); + e.put( "--min_mapping_quality_score 26", "f8b1a0449f0a49c1c5edb70f4e90b5f8" ); + e.put( "--max_mismatches_in_40bp_window 5", "bf29fc94431f7b18fd4c65025294de3c" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -108,12 +108,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("8b4ebcbf330340a437e572a73a99227b")); + Arrays.asList("065be668bbea5342c7700017e131eb2a")); executeTest("testConfidence1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("0d8825a98a8918bed2c02ac44e15dde7")); + Arrays.asList("b58578df1d05926fb7ecd9fecffa9c5e")); executeTest("testConfidence2", spec2); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index b6614f44e..880e1d4cd 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -66,12 +66,12 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "38b7e64b91c726867a604cf95b9cb10a"); } // official project VCF files in tabix format @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "4c667935099544a1863e70ae88ddd685"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cbd061cf35e0ffc48621111a602f3871"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "17c8468b1b963c9abc49dff17fd811ba"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "f3fce9ae729548e7e7c378a8282df235"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f2a2854f26734c8c3412f842a245fd34"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "7fce3e9539ce4eb0a839d9236c928dfc"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "a66a799b1ae9fd09b40f78af6ef538d8"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "528cff763b95b82717008d2c05e8c393"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "a9126d1cbe1fdf741236763fb3e3461f"); } @@ -86,7 +86,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("37c66ca16fb5f890bcaa8e226a5f2e61")); + Arrays.asList("daf2c43b629c9fc8d5f064e05bbc51b7")); executeTest("threeWayWithRefs", spec); }