CombineVariants no longer combines records of different types. So now when combining SNP and indel callsets, overlapping calls get their own records. Useful for Khalid in the pipeline. For those interested, it turns out the previous behavior was doing the wrong thing occasionally (and this was even captured in the integration tests).
This commit is contained in:
parent
bc8b5da698
commit
80b5c5261a
|
|
@ -172,17 +172,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||
return 0;
|
||||
|
||||
VariantContext mergedVC;
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
if ( master ) {
|
||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
||||
mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
|
||||
} else {
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
// iterate over the keys (and not the values) so that it's deterministic
|
||||
for ( VariantContext.Type type : VCsByType.keySet() ) {
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
}
|
||||
|
||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
for ( VariantContext mergedVC : mergedVCs ) {
|
||||
// only operate at the start of events
|
||||
if ( mergedVC == null )
|
||||
continue;
|
||||
|
||||
if ( mergedVC != null ) { // only operate at the start of events
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
||||
// re-compute chromosome counts
|
||||
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
||||
|
|
|
|||
|
|
@ -492,7 +492,7 @@ public class VariantContextUtils {
|
|||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
|
|
@ -637,6 +637,17 @@ public class VariantContextUtils {
|
|||
return merged;
|
||||
}
|
||||
|
||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(vc);
|
||||
}
|
||||
|
||||
return mappedVCs;
|
||||
}
|
||||
|
||||
private static class AlleleMapper {
|
||||
private VariantContext vc = null;
|
||||
private Map<Allele, Allele> map = null;
|
||||
|
|
|
|||
|
|
@ -80,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "cba8f749f2444d69a54553b15328ed47", false); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "78b169cf9955c9fd01340292d5ba2dca", false); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
|
||||
Arrays.asList("0e475c98d5152fb12eb17f3907b849a9"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue