Updating UnifiedGenotyper to use the new rod binding system.
This commit is contained in:
parent
79c86e211f
commit
c7b9a9ef0a
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -35,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -321,7 +321,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
|
|
||||||
if (getAlleleListFromVCF) {
|
if (getAlleleListFromVCF) {
|
||||||
for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles") ) {
|
for( final VariantContext vc_input : tracker.getValues(UAC.alleles) ) {
|
||||||
if( vc_input != null &&
|
if( vc_input != null &&
|
||||||
allowableTypes.contains(vc_input.getType()) &&
|
allowableTypes.contains(vc_input.getType()) &&
|
||||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -57,13 +58,13 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger) {
|
public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
||||||
if ( tracker == null || ref == null || logger == null )
|
if ( tracker == null || ref == null || logger == null )
|
||||||
throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
|
throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
|
||||||
VariantContext vc = null;
|
VariantContext vc = null;
|
||||||
|
|
||||||
// search for usable record
|
// search for usable record
|
||||||
for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles", ref.getLocus()) ) {
|
for( final VariantContext vc_input : tracker.getValues(allelesBinding) ) {
|
||||||
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
|
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
|
||||||
if ( vc == null ) {
|
if ( vc == null ) {
|
||||||
vc = vc_input;
|
vc = vc_input;
|
||||||
|
|
@ -95,7 +96,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
if ( alternateAlleleToUse != null ) {
|
if ( alternateAlleleToUse != null ) {
|
||||||
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
||||||
} else if ( useAlleleFromVCF ) {
|
} else if ( useAlleleFromVCF ) {
|
||||||
VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger);
|
VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger, UAC.alleles);
|
||||||
|
|
||||||
// ignore places where we don't have a variant
|
// ignore places where we don't have a variant
|
||||||
if ( vc == null )
|
if ( vc == null )
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
|
@ -61,6 +64,11 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
|
@Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
|
||||||
public boolean COMPUTE_SLOD = false;
|
public boolean COMPUTE_SLOD = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
||||||
|
*/
|
||||||
|
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
|
||||||
|
public RodBinding<VariantContext> alleles;
|
||||||
|
|
||||||
// control the error modes
|
// control the error modes
|
||||||
@Hidden
|
@Hidden
|
||||||
|
|
@ -168,6 +176,7 @@ public class UnifiedArgumentCollection {
|
||||||
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
||||||
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
||||||
uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
|
uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
|
||||||
|
uac.alleles = alleles;
|
||||||
|
|
||||||
uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA;
|
uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA;
|
||||||
uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE;
|
uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE;
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
@ -69,9 +66,35 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
*/
|
*/
|
||||||
@ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
@ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
||||||
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
|
|
||||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
/**
|
||||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
* The INFO field will be annotated with information on the most biologically-significant effect
|
||||||
|
* listed in the SnpEff output file for each variant.
|
||||||
|
*/
|
||||||
|
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false)
|
||||||
|
public RodBinding<SnpEffFeature> snpEffFile;
|
||||||
|
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated
|
||||||
|
* as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are filtered in the comp track will be ignored.
|
||||||
|
* Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
|
||||||
|
*/
|
||||||
|
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
|
||||||
|
public List<RodBinding<VariantContext>> comps = Collections.emptyList();
|
||||||
|
public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An external resource VCF file or files from which to annotate.
|
||||||
|
*
|
||||||
|
* One can add annotations from one of the resource VCFs to the output.
|
||||||
|
* For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource',
|
||||||
|
* you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position.
|
||||||
|
* If multiple records in the rod overlap the given position, one is chosen arbitrarily.
|
||||||
|
*/
|
||||||
|
@Input(fullName="resource", shortName = "resource", doc="external resource VCF file", required=false)
|
||||||
|
public List<RodBinding<VariantContext>> resources = Collections.emptyList();
|
||||||
|
public List<RodBinding<VariantContext>> getResourceRodBindings() { return resources; }
|
||||||
|
|
||||||
// control the output
|
// control the output
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="File to which variants should be written",required=true)
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,7 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// the unified argument collection
|
// the unified argument collection
|
||||||
private final UnifiedArgumentCollection UAC;
|
private final UnifiedArgumentCollection UAC;
|
||||||
|
public UnifiedArgumentCollection getUAC() { return UAC; }
|
||||||
|
|
||||||
// the annotation engine
|
// the annotation engine
|
||||||
private final VariantAnnotatorEngine annotationEngine;
|
private final VariantAnnotatorEngine annotationEngine;
|
||||||
|
|
@ -232,7 +233,7 @@ public class UnifiedGenotyperEngine {
|
||||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||||
VariantContext vc;
|
VariantContext vc;
|
||||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger);
|
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles);
|
||||||
if ( vcInput == null )
|
if ( vcInput == null )
|
||||||
return null;
|
return null;
|
||||||
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
|
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
|
||||||
|
|
@ -630,7 +631,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// no extended event pileup
|
// no extended event pileup
|
||||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||||
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||||
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger);
|
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger, UAC.alleles);
|
||||||
if (vcInput == null)
|
if (vcInput == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
GenomeAnalysisEngine.resetRandomGenerator();
|
GenomeAnalysisEngine.resetRandomGenerator();
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
||||||
}
|
}
|
||||||
|
|
@ -53,12 +53,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testWithAllelesPassedIn() {
|
public void testWithAllelesPassedIn() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||||
Arrays.asList("811ddc0bd8322b14f14f58df8c627aa9"));
|
Arrays.asList("811ddc0bd8322b14f14f58df8c627aa9"));
|
||||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||||
Arrays.asList("5cf08dd7ac3d218082f7be3915ce0b15"));
|
Arrays.asList("5cf08dd7ac3d218082f7be3915ce0b15"));
|
||||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
@ -286,13 +286,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testWithIndelAllelesPassedIn() {
|
public void testWithIndelAllelesPassedIn() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("69b0b3f089c80b9864294d838a061336"));
|
Arrays.asList("69b0b3f089c80b9864294d838a061336"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf "
|
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("c90174cfd7dd68bdef36fe2c60145e10"));
|
Arrays.asList("c90174cfd7dd68bdef36fe2c60145e10"));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue