Merge pull request #575 from broadinstitute/eb_various_fixes_for_gvcfs
Eb various fixes for gvcfs
This commit is contained in:
commit
ce39fcd8a3
|
|
@ -292,7 +292,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
||||||
|
|
||||||
// attributes
|
// attributes
|
||||||
final Map<String, Object> attrs = new HashMap<>(1);
|
final Map<String, Object> attrs = new HashMap<>(1);
|
||||||
if ( !USE_BP_RESOLUTION )
|
if ( !USE_BP_RESOLUTION && end != start )
|
||||||
attrs.put(VCFConstants.END_KEY, Integer.toString(end));
|
attrs.put(VCFConstants.END_KEY, Integer.toString(end));
|
||||||
|
|
||||||
// genotypes
|
// genotypes
|
||||||
|
|
|
||||||
|
|
@ -121,8 +121,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
@Output(doc="File to which variants should be written")
|
@Output(doc="File to which variants should be written")
|
||||||
protected VariantContextWriter vcfWriter = null;
|
protected VariantContextWriter vcfWriter = null;
|
||||||
|
|
||||||
// TODO -- currently this option doesn't actually work; must fix
|
@Argument(fullName="includeNonVariantSites", shortName="allSites", doc="Include loci found to be non-variant after genotyping", required=false)
|
||||||
@Argument(fullName="includeNonVariants", shortName="inv", doc="Include loci found to be non-variant after the combining procedure", required=false)
|
|
||||||
public boolean INCLUDE_NON_VARIANTS = false;
|
public boolean INCLUDE_NON_VARIANTS = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -216,41 +215,68 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
result = new VariantContextBuilder(regenotypedVC).attributes(attrs).make();
|
result = new VariantContextBuilder(regenotypedVC).attributes(attrs).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
// if it turned monomorphic and we don't want such sites, quit
|
// if it turned monomorphic then we either need to ignore or fix such sites
|
||||||
if ( !INCLUDE_NON_VARIANTS && result.isMonomorphicInSamples() )
|
boolean createRefGTs = false;
|
||||||
return null;
|
if ( result.isMonomorphicInSamples() ) {
|
||||||
|
if ( !INCLUDE_NON_VARIANTS )
|
||||||
|
return null;
|
||||||
|
createRefGTs = true;
|
||||||
|
}
|
||||||
|
|
||||||
// re-annotate it
|
// re-annotate it
|
||||||
result = annotationEngine.annotateContext(tracker, ref, null, result);
|
result = annotationEngine.annotateContext(tracker, ref, null, result);
|
||||||
|
|
||||||
// fix some of the annotations
|
// fix some of the annotations
|
||||||
return new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result.getGenotypes())).make();
|
return new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, createRefGTs)).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans up genotype-level annotations that need to be updated.
|
* Cleans up genotype-level annotations that need to be updated.
|
||||||
* 1. move MIN_DP to DP if present
|
* 1. move MIN_DP to DP if present
|
||||||
* 2. remove SB is present
|
* 2. propagate DP to AD if not present
|
||||||
|
* 3. remove SB if present
|
||||||
*
|
*
|
||||||
* @param newGs the new Genotypes to fix
|
* @param VC the VariantContext with the Genotypes to fix
|
||||||
|
* @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic
|
||||||
* @return a new set of Genotypes
|
* @return a new set of Genotypes
|
||||||
*/
|
*/
|
||||||
private List<Genotype> cleanupGenotypeAnnotations(final GenotypesContext newGs) {
|
private List<Genotype> cleanupGenotypeAnnotations(final VariantContext VC, final boolean createRefGTs) {
|
||||||
final List<Genotype> recoveredGs = new ArrayList<>(newGs.size());
|
final GenotypesContext oldGTs = VC.getGenotypes();
|
||||||
for ( final Genotype newG : newGs ) {
|
final List<Genotype> recoveredGs = new ArrayList<>(oldGTs.size());
|
||||||
final Map<String, Object> attrs = new HashMap<>(newG.getExtendedAttributes());
|
for ( final Genotype oldGT : oldGTs ) {
|
||||||
|
final Map<String, Object> attrs = new HashMap<>(oldGT.getExtendedAttributes());
|
||||||
|
|
||||||
final GenotypeBuilder builder = new GenotypeBuilder(newG);
|
final GenotypeBuilder builder = new GenotypeBuilder(oldGT);
|
||||||
|
int depth = oldGT.hasDP() ? oldGT.getDP() : 0;
|
||||||
|
|
||||||
// move the MIN_DP to DP
|
// move the MIN_DP to DP
|
||||||
if ( newG.hasExtendedAttribute("MIN_DP") ) {
|
if ( oldGT.hasExtendedAttribute("MIN_DP") ) {
|
||||||
builder.DP(newG.getAttributeAsInt("MIN_DP", 0));
|
depth = oldGT.getAttributeAsInt("MIN_DP", 0);
|
||||||
|
builder.DP(depth);
|
||||||
attrs.remove("MIN_DP");
|
attrs.remove("MIN_DP");
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove SB
|
// remove SB
|
||||||
attrs.remove("SB");
|
attrs.remove("SB");
|
||||||
|
|
||||||
|
// create AD if it's not there
|
||||||
|
if ( !oldGT.hasAD() && VC.isVariant() ) {
|
||||||
|
final int[] AD = new int[VC.getNAlleles()];
|
||||||
|
AD[0] = depth;
|
||||||
|
builder.AD(AD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( createRefGTs ) {
|
||||||
|
final int ploidy = oldGT.getPloidy();
|
||||||
|
final List<Allele> refAlleles = new ArrayList<>(ploidy);
|
||||||
|
for ( int i = 0; i < ploidy; i++ )
|
||||||
|
refAlleles.add(VC.getReference());
|
||||||
|
builder.alleles(refAlleles);
|
||||||
|
|
||||||
|
// also, the PLs are technically no longer usable
|
||||||
|
builder.noPL();
|
||||||
|
}
|
||||||
|
|
||||||
recoveredGs.add(builder.noAttributes().attributes(attrs).make());
|
recoveredGs.add(builder.noAttributes().attributes(attrs).make());
|
||||||
}
|
}
|
||||||
return recoveredGs;
|
return recoveredGs;
|
||||||
|
|
|
||||||
|
|
@ -161,16 +161,24 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testMD5s() throws Exception {
|
public void testMD5s() throws Exception {
|
||||||
final String cmd = baseTestString(" -L 1:69485-69791");
|
final String cmd = baseTestString(" -L 1:69485-69791");
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("aecdfa9eb32b802cd629e9f811ef15fd"));
|
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c279e089fd15359e75867b1318cb4d50"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testMD5s", spec);
|
executeTest("testMD5s", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBasepairResolution() throws Exception {
|
public void testBasepairResolutionOutput() throws Exception {
|
||||||
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
|
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4"));
|
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testBasepairResolution", spec);
|
executeTest("testBasepairResolutionOutput", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasepairResolutionInput() throws Exception {
|
||||||
|
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("0bd914cfa16349ee0439bfa5033a4f2a"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testBasepairResolutionInput", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -204,4 +204,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList("f8c014d0af7e014475a2a448dc1f9cef"));
|
Arrays.asList("f8c014d0af7e014475a2a448dc1f9cef"));
|
||||||
cvExecuteTest("combineLeavesUnfilteredRecordsUnfiltered: ", spec, false);
|
cvExecuteTest("combineLeavesUnfilteredRecordsUnfiltered: ", spec, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void combiningGVCFsFails() {
|
||||||
|
try {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CombineVariants --no_cmdline_in_header -o %s "
|
||||||
|
+ " -R " + b37KGReference
|
||||||
|
+ " -V " + privateTestDir + "gvcfExample1.vcf",
|
||||||
|
1,
|
||||||
|
Arrays.asList("FAILFAILFAILFAILFAILFAILFAILFAIL"));
|
||||||
|
executeTest("combiningGVCFsFails", spec);
|
||||||
|
} catch (Exception e) { } // do nothing
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -65,19 +65,19 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("2be5f6f7e7f79841108906555d548683"));
|
Arrays.asList("af58920a965229526aa6961f0d30b035"));
|
||||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false) // TODO -- reenable when this option works
|
@Test(enabled = true)
|
||||||
public void combineSingleSamplePipelineGVCF_includeNonVariants() {
|
public void combineSingleSamplePipelineGVCF_includeNonVariants() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
||||||
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
|
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
|
||||||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||||
" -inv -L 20:10,000,000-10,010,000", b37KGReference),
|
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("de957075796512cb9f333f77515e97d5"));
|
Arrays.asList("764c8d62b4128496471f49cda029b5da"));
|
||||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("e3c7452277898fece54bf60af9588666"));
|
Arrays.asList("67120b6784e91e737c37a1d0b3d53bc4"));
|
||||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("67410d8ac490e3c9d19ba7a4cceaf8fb"));
|
Arrays.asList("24401bb1011bf6cd452e428c6ad5852d"));
|
||||||
executeTest("testSamplesWithDifferentLs", spec);
|
executeTest("testSamplesWithDifferentLs", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -326,6 +326,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
||||||
if ( mergedVC == null )
|
if ( mergedVC == null )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if ( mergedVC.hasAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) )
|
||||||
|
throw new UserException("CombineVariants should not be used to merge gVCFs produced by the HaplotypeCaller; use CombineGVCFs instead");
|
||||||
|
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);
|
final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);
|
||||||
// re-compute chromosome counts
|
// re-compute chromosome counts
|
||||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||||
|
|
|
||||||
|
|
@ -1921,10 +1921,6 @@ public class GATKVariantContextUtils {
|
||||||
final HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<>();
|
final HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<>();
|
||||||
for ( final VariantContext vc : VCs ) {
|
for ( final VariantContext vc : VCs ) {
|
||||||
VariantContext.Type vcType = vc.getType();
|
VariantContext.Type vcType = vc.getType();
|
||||||
if( vc.hasAllele(NON_REF_SYMBOLIC_ALLELE) ) {
|
|
||||||
if( vc.getAlternateAlleles().size() > 1 ) { throw new IllegalStateException("Reference records should not have more than one alternate allele"); }
|
|
||||||
vcType = VariantContext.Type.NO_VARIATION;
|
|
||||||
}
|
|
||||||
|
|
||||||
// look at previous variant contexts of different type. If:
|
// look at previous variant contexts of different type. If:
|
||||||
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue