CombineVariants no longer combines records of different types. So now when combining SNP and indel callsets, overlapping calls get their own records. Useful for Khalid in the pipeline. For those interested, it turns out the previous behavior was doing the wrong thing occasionally (and this was even captured in the integration tests).

This commit is contained in:
Eric Banks 2011-07-18 13:42:45 -04:00
parent bc8b5da698
commit 80b5c5261a
3 changed files with 28 additions and 10 deletions

View File

@ -172,17 +172,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
return 0; return 0;
VariantContext mergedVC; List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
if ( master ) { if ( master ) {
mergedVC = VariantContextUtils.masterMerge(vcs, "master"); mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
} else { } else {
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType, Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); // iterate over the keys (and not the values) so that it's deterministic
for ( VariantContext.Type type : VCsByType.keySet() ) {
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
} }
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )
continue;
if ( mergedVC != null ) { // only operate at the start of events
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes()); HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
// re-compute chromosome counts // re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);

View File

@ -492,7 +492,7 @@ public class VariantContextUtils {
if ( ! filteredAreUncalled || vc.isNotFiltered() ) if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false)); VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
} }
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null; return null;
// establish the baseline info from the first VC // establish the baseline info from the first VC
@ -637,6 +637,17 @@ public class VariantContextUtils {
return merged; return merged;
} }
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
return mappedVCs;
}
private static class AlleleMapper { private static class AlleleMapper {
private VariantContext vc = null; private VariantContext vc = null;
private Map<Allele, Allele> map = null; private Map<Allele, Allele> map = null;

View File

@ -80,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); } @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "cba8f749f2444d69a54553b15328ed47", false); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "78b169cf9955c9fd01340292d5ba2dca", false); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
@ -100,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"), " -genotypeMergeOptions UNIQUIFY -L 1"),
1, 1,
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5")); Arrays.asList("0e475c98d5152fb12eb17f3907b849a9"));
executeTest("threeWayWithRefs", spec); executeTest("threeWayWithRefs", spec);
} }