Merge pull request #98 from broadinstitute/rp_hc_glm_both
Use the indel heterozygosity prior when calling indels with the HC
This commit is contained in:
commit
72f9abfcab
|
|
@ -74,6 +74,12 @@ public class StandardCallerArgumentCollection {
|
||||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||||
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This argument informs the prior probability of having an indel at a site.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
|
||||||
|
public double INDEL_HETEROZYGOSITY = 1.0/8000;
|
||||||
|
|
||||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||||
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||||
|
|
||||||
|
|
@ -179,6 +185,7 @@ public class StandardCallerArgumentCollection {
|
||||||
this.alleles = SCAC.alleles;
|
this.alleles = SCAC.alleles;
|
||||||
this.GenotypingMode = SCAC.GenotypingMode;
|
this.GenotypingMode = SCAC.GenotypingMode;
|
||||||
this.heterozygosity = SCAC.heterozygosity;
|
this.heterozygosity = SCAC.heterozygosity;
|
||||||
|
this.INDEL_HETEROZYGOSITY = SCAC.INDEL_HETEROZYGOSITY;
|
||||||
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
|
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
|
||||||
this.OutputMode = SCAC.OutputMode;
|
this.OutputMode = SCAC.OutputMode;
|
||||||
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
|
||||||
|
|
|
||||||
|
|
@ -113,12 +113,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
@Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
|
||||||
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
|
||||||
|
|
||||||
/**
|
|
||||||
* This argument informs the prior probability of having an indel at a site.
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
|
|
||||||
public double INDEL_HETEROZYGOSITY = 1.0/8000;
|
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
|
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false)
|
||||||
public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
|
public byte INDEL_GAP_CONTINUATION_PENALTY = 10;
|
||||||
|
|
@ -238,7 +232,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;
|
this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;
|
||||||
this.MIN_INDEL_COUNT_FOR_GENOTYPING = uac.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
this.MIN_INDEL_COUNT_FOR_GENOTYPING = uac.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||||
this.MIN_INDEL_FRACTION_PER_SAMPLE = uac.MIN_INDEL_FRACTION_PER_SAMPLE;
|
this.MIN_INDEL_FRACTION_PER_SAMPLE = uac.MIN_INDEL_FRACTION_PER_SAMPLE;
|
||||||
this.INDEL_HETEROZYGOSITY = uac.INDEL_HETEROZYGOSITY;
|
|
||||||
this.INDEL_GAP_OPEN_PENALTY = uac.INDEL_GAP_OPEN_PENALTY;
|
this.INDEL_GAP_OPEN_PENALTY = uac.INDEL_GAP_OPEN_PENALTY;
|
||||||
this.INDEL_GAP_CONTINUATION_PENALTY = uac.INDEL_GAP_CONTINUATION_PENALTY;
|
this.INDEL_GAP_CONTINUATION_PENALTY = uac.INDEL_GAP_CONTINUATION_PENALTY;
|
||||||
this.OUTPUT_DEBUG_INDEL_INFO = uac.OUTPUT_DEBUG_INDEL_INFO;
|
this.OUTPUT_DEBUG_INDEL_INFO = uac.OUTPUT_DEBUG_INDEL_INFO;
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -267,7 +268,7 @@ public class GenotypingEngine {
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
||||||
|
|
||||||
final GenotypesContext genotypes = calculateGLsForThisEvent( samples, alleleReadMap, mergedVC );
|
final GenotypesContext genotypes = calculateGLsForThisEvent( samples, alleleReadMap, mergedVC );
|
||||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
|
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||||
if( call != null ) {
|
if( call != null ) {
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
||||||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0, UG_engine.getUAC().contaminationLog ) );
|
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0, UG_engine.getUAC().contaminationLog ) );
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleComplex() {
|
public void testHaplotypeCallerMultiSampleComplex() {
|
||||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a960722c1ae2b6f774d3443a7e5ac27d");
|
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a2232995ca9bec143e664748845a0045");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||||
|
|
@ -75,7 +75,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
// TODO -- need a better symbolic allele test
|
// TODO -- need a better symbolic allele test
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "56f2ef9acc6c0d267cf2b7a447d87fb7");
|
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "298c1af47a515ea7c8c1ea704d7755ce");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||||
|
|
@ -93,6 +93,6 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||||
"f2df7a8f53ce449e4a8e8f8496e7c745");
|
"9563e3c1eee2ef46afc7822af0bb58a8");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSample() {
|
public void testHaplotypeCallerMultiSample() {
|
||||||
HCTest(CEUTRIO_BAM, "", "aac5517a0a64ad291b6b00825d982f7f");
|
HCTest(CEUTRIO_BAM, "", "8f33e40686443b9a72de45d5a9da1861");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSample() {
|
public void testHaplotypeCallerSingleSample() {
|
||||||
HCTest(NA12878_BAM, "", "3bfab723fb0f3a65998d82152b67ed15");
|
HCTest(NA12878_BAM, "", "8f2b047cdace0ef122d6ad162e7bc5b9");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false)
|
||||||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGA() {
|
public void testHaplotypeCallerMultiSampleGGA() {
|
||||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||||
"283524b3e3397634d4cf0dc2b8723002");
|
"9d4be26a2c956ba4b7b4044820eab030");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||||
|
|
@ -95,7 +95,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f1f867dbbe3747f16a0d9e5f11e6ed64");
|
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "5af4782a0e1bc9b966b9e3ae76245919");
|
||||||
}
|
}
|
||||||
|
|
||||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||||
|
|
@ -112,7 +112,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void HCTestStructuralIndels() {
|
public void HCTestStructuralIndels() {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a17e95c1191e3aef7892586fe38ca050"));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("03557376242bdf78c5237703b762573b"));
|
||||||
executeTest("HCTestStructuralIndels: ", spec);
|
executeTest("HCTestStructuralIndels: ", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -142,7 +142,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||||
Arrays.asList("6debe567cd5ed7eb5756b6605a151f56"));
|
Arrays.asList("a43c595a617589388ff3d7e2ddc661e7"));
|
||||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue