CombineVariants no longer combines records of different types. So now when combining SNP and indel callsets, overlapping calls get their own records. Useful for Khalid in the pipeline. For those interested, it turns out the previous behavior was doing the wrong thing occasionally (and this was even captured in the integration tests).

This commit is contained in:
Eric Banks 2011-07-18 13:42:45 -04:00
parent bc8b5da698
commit 80b5c5261a
3 changed files with 28 additions and 10 deletions

View File

@ -172,17 +172,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
return 0;
VariantContext mergedVC;
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
if ( master ) {
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
} else {
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType,
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// iterate over the keys (and not the values) so that it's deterministic
for ( VariantContext.Type type : VCsByType.keySet() ) {
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
}
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )
continue;
if ( mergedVC != null ) { // only operate at the start of events
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);

View File

@ -492,7 +492,7 @@ public class VariantContextUtils {
if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
}
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null;
// establish the baseline info from the first VC
@ -637,6 +637,17 @@ public class VariantContextUtils {
return merged;
}
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
return mappedVCs;
}
private static class AlleleMapper {
private VariantContext vc = null;
private Map<Allele, Allele> map = null;

View File

@ -80,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "cba8f749f2444d69a54553b15328ed47", false); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "78b169cf9955c9fd01340292d5ba2dca", false); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
@ -100,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
Arrays.asList("0e475c98d5152fb12eb17f3907b849a9"));
executeTest("threeWayWithRefs", spec);
}