Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
37eb6c92bf
|
|
@ -253,7 +253,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
||||||
intervalList.addAll(toolkit.getIntervals());
|
intervalList.addAll(toolkit.getIntervals());
|
||||||
|
|
||||||
|
|
||||||
// todo -- rework the whole NO_PG_TAG thing
|
|
||||||
final boolean preSorted = true;
|
final boolean preSorted = true;
|
||||||
final boolean indexOnTheFly = true;
|
final boolean indexOnTheFly = true;
|
||||||
final boolean keep_records = true;
|
final boolean keep_records = true;
|
||||||
|
|
|
||||||
|
|
@ -220,7 +220,6 @@ public class SlidingWindow {
|
||||||
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
|
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo -- can be more aggressive here removing until the NEW window header start location after closing the variant regions
|
|
||||||
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
||||||
readsInWindow.pollFirst();
|
readsInWindow.pollFirst();
|
||||||
}
|
}
|
||||||
|
|
@ -607,9 +606,7 @@ public class SlidingWindow {
|
||||||
toRemove.add(read);
|
toRemove.add(read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (GATKSAMRecord read : toRemove) {
|
removeReadsFromWindow(toRemove);
|
||||||
readsInWindow.remove(read);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return allReads;
|
return allReads;
|
||||||
}
|
}
|
||||||
|
|
@ -805,9 +802,8 @@ public class SlidingWindow {
|
||||||
hetReads.add(finalizeRunningConsensus());
|
hetReads.add(finalizeRunningConsensus());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (GATKSAMRecord read : toRemove) {
|
removeReadsFromWindow(toRemove);
|
||||||
readsInWindow.remove(read);
|
|
||||||
}
|
|
||||||
return hetReads;
|
return hetReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -924,5 +920,11 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void removeReadsFromWindow (List<GATKSAMRecord> readsToRemove) {
|
||||||
|
for (GATKSAMRecord read : readsToRemove) {
|
||||||
|
readsInWindow.remove(read);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,13 +41,11 @@ import java.util.*;
|
||||||
public class GenotypingEngine {
|
public class GenotypingEngine {
|
||||||
|
|
||||||
private final boolean DEBUG;
|
private final boolean DEBUG;
|
||||||
private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
|
||||||
private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
||||||
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
|
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
|
||||||
|
|
||||||
public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
|
public GenotypingEngine( final boolean DEBUG ) {
|
||||||
this.DEBUG = DEBUG;
|
this.DEBUG = DEBUG;
|
||||||
this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
|
||||||
noCall.add(Allele.NO_CALL);
|
noCall.add(Allele.NO_CALL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -131,14 +131,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
||||||
protected int MIN_PRUNE_FACTOR = 1;
|
protected int MIN_PRUNE_FACTOR = 1;
|
||||||
|
|
||||||
@Advanced
|
|
||||||
@Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
|
|
||||||
protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
|
|
||||||
|
|
||||||
@Advanced
|
|
||||||
@Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
|
|
||||||
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
|
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
|
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
|
||||||
protected int gcpHMM = 10;
|
protected int gcpHMM = 10;
|
||||||
|
|
@ -248,10 +240,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
|
|
||||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||||
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
|
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
|
||||||
simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||||
simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
|
||||||
simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
|
simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
|
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||||
|
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||||
simpleUAC.exactCallsLog = null;
|
simpleUAC.exactCallsLog = null;
|
||||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||||
|
|
||||||
|
|
@ -273,15 +266,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
VCFConstants.GENOTYPE_QUALITY_KEY,
|
VCFConstants.GENOTYPE_QUALITY_KEY,
|
||||||
VCFConstants.DEPTH_KEY,
|
VCFConstants.DEPTH_KEY,
|
||||||
VCFConstants.GENOTYPE_PL_KEY);
|
VCFConstants.GENOTYPE_PL_KEY);
|
||||||
// header lines for the experimental HaplotypeCaller-specific annotations
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("NVH", 1, VCFHeaderLineType.Integer, "Number of variants found on the haplotype that contained this variant"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("NumHapEval", 1, VCFHeaderLineType.Integer, "Number of haplotypes that were chosen for evaluation in this active region"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("NumHapAssembly", 1, VCFHeaderLineType.Integer, "Number of haplotypes created during the assembly of this active region"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("ActiveRegionSize", 1, VCFHeaderLineType.Integer, "Number of base pairs that comprise this active region"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("EVENTLENGTH", 1, VCFHeaderLineType.Integer, "Max length of all the alternate alleles"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("TYPE", 1, VCFHeaderLineType.String, "Type of event: SNP or INDEL"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("extType", 1, VCFHeaderLineType.String, "Extended type of event: SNP, MNP, INDEL, or COMPLEX"));
|
|
||||||
headerInfo.add(new VCFInfoHeaderLine("QDE", 1, VCFHeaderLineType.Float, "QD value divided by the number of variants found on the haplotype that contained this variant"));
|
|
||||||
|
|
||||||
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
|
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
|
||||||
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
||||||
|
|
@ -298,7 +282,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
|
|
||||||
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
|
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
|
||||||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
||||||
genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
|
genotypingEngine = new GenotypingEngine( DEBUG );
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -428,43 +412,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
||||||
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
||||||
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
|
||||||
|
|
||||||
if( !GENOTYPE_FULL_ACTIVE_REGION ) {
|
|
||||||
// add some custom annotations to the calls
|
|
||||||
|
|
||||||
// Calculate the number of variants on the haplotype
|
|
||||||
int maxNumVar = 0;
|
|
||||||
for( final Allele allele : callResult.getFirst().getAlleles() ) {
|
|
||||||
if( !allele.isReference() ) {
|
|
||||||
for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
|
|
||||||
final int numVar = haplotype.getEventMap().size();
|
|
||||||
if( numVar > maxNumVar ) { maxNumVar = numVar; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Calculate the event length
|
|
||||||
int maxLength = 0;
|
|
||||||
for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
|
|
||||||
final int length = a.length() - annotatedCall.getReference().length();
|
|
||||||
if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
|
|
||||||
}
|
|
||||||
|
|
||||||
myAttributes.put("NVH", maxNumVar);
|
|
||||||
myAttributes.put("NumHapEval", bestHaplotypes.size());
|
|
||||||
myAttributes.put("NumHapAssembly", haplotypes.size());
|
|
||||||
myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
|
|
||||||
myAttributes.put("EVENTLENGTH", maxLength);
|
|
||||||
myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
|
|
||||||
myAttributes.put("extType", annotatedCall.getType().toString() );
|
|
||||||
|
|
||||||
//if( likelihoodCalculationEngine.haplotypeScore != null ) {
|
|
||||||
// myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
|
|
||||||
//}
|
|
||||||
if( annotatedCall.hasAttribute("QD") ) {
|
|
||||||
myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
|
vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -520,6 +467,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
|
if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
|
||||||
final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
|
final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
|
||||||
// protect against INTERVALS with abnormally high coverage
|
// protect against INTERVALS with abnormally high coverage
|
||||||
|
// BUGBUG: remove when positinal downsampler is hooked up to ART/HC
|
||||||
if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
|
if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
|
||||||
activeRegion.add(clippedRead);
|
activeRegion.add(clippedRead);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,6 @@ public class LikelihoodCalculationEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute the diploid haplotype likelihoods
|
// compute the diploid haplotype likelihoods
|
||||||
// todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
|
|
||||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||||
for( int jjj = 0; jjj <= iii; jjj++ ) {
|
for( int jjj = 0; jjj <= iii; jjj++ ) {
|
||||||
for( final Haplotype iii_mapped : haplotypeMapping.get(alleleOrdering.get(iii)) ) {
|
for( final Haplotype iii_mapped : haplotypeMapping.get(alleleOrdering.get(iii)) ) {
|
||||||
|
|
|
||||||
|
|
@ -21,18 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSample() {
|
public void testHaplotypeCallerMultiSample() {
|
||||||
HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3");
|
HCTest(CEUTRIO_BAM, "", "2b39732ff8e0de5bc2ae949aaf7a6f21");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSample() {
|
public void testHaplotypeCallerSingleSample() {
|
||||||
HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
|
HCTest(NA12878_BAM, "", "8b217638ff585effb9cc70e9a9aa544f");
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
|
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGA() {
|
public void testHaplotypeCallerMultiSampleGGA() {
|
||||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "f2d0309fdf50d5827e9c60ed0dd07e3f");
|
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||||
|
"541aa8291f03ba33bd1ad3d731fd5657");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||||
|
|
@ -43,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleComplex() {
|
public void testHaplotypeCallerMultiSampleComplex() {
|
||||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
|
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd7170cbde7df04d4fbe1da7903c31c6");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||||
|
|
@ -54,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6");
|
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "99456fc7207c1fe9f367a0d0afae87cd");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||||
|
|
@ -65,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
|
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6c1631785b3f832aecab1a99f0454762");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100"));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ec437d2d9f3ae07d155983be0155c8ed"));
|
||||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void HCTestStructuralIndels() {
|
public void HCTestStructuralIndels() {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784"));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("237601bbc39694c7413a332cbb656c8e"));
|
||||||
executeTest("HCTestStructuralIndels: ", spec);
|
executeTest("HCTestStructuralIndels: ", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -92,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestReducedBam() {
|
public void HCTestReducedBam() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||||
Arrays.asList("425f1a0fb00d7145edf1c55e54346fae"));
|
Arrays.asList("40bf739fb2b1743642498efe79ea6342"));
|
||||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -288,7 +289,7 @@ public abstract class CommandLineProgram {
|
||||||
*/
|
*/
|
||||||
private static void printDocumentationReference() {
|
private static void printDocumentationReference() {
|
||||||
errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
|
errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
|
||||||
errorPrintf("commonly asked questions http://www.broadinstitute.org/gatk%n");
|
errorPrintf("commonly asked questions " + HelpUtils.BASE_GATK_URL + "%n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -160,7 +161,7 @@ public class CommandLineGATK extends CommandLineExecutable {
|
||||||
List<String> header = new ArrayList<String>();
|
List<String> header = new ArrayList<String>();
|
||||||
header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
|
header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
|
||||||
header.add("Copyright (c) 2010 The Broad Institute");
|
header.add("Copyright (c) 2010 The Broad Institute");
|
||||||
header.add("For support and documentation go to http://www.broadinstitute.org/gatk");
|
header.add("For support and documentation go to " + HelpUtils.BASE_GATK_URL);
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.filters;
|
package org.broadinstitute.sting.gatk.filters;
|
||||||
|
|
||||||
import com.google.common.base.Function;
|
|
||||||
import com.google.common.collect.Collections2;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -73,7 +71,7 @@ public class FilterManager extends PluginManager<ReadFilter> {
|
||||||
|
|
||||||
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
|
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
|
||||||
userFriendlyListofReadFilters(availableFilters),
|
userFriendlyListofReadFilters(availableFilters),
|
||||||
"Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
|
"Please consult the GATK Documentation (" + HelpUtils.GATK_DOCS_URL + ") for more information.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
|
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
|
|
||||||
// skip this location -- it's not part of our engine intervals
|
// skip this location -- it's not part of our engine intervals
|
||||||
// TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow
|
|
||||||
if ( outsideEngineIntervals(location) )
|
if ( outsideEngineIntervals(location) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
|
||||||
final List<Double> refQuals, final List<Double> altQuals) {
|
final List<Double> refQuals, final List<Double> altQuals) {
|
||||||
|
|
||||||
if (pileup != null && likelihoodMap == null) {
|
if (pileup != null && likelihoodMap == null) {
|
||||||
// no per-read likelihoods available:
|
// old UG snp-only path through the annotations
|
||||||
for ( final PileupElement p : pileup ) {
|
for ( final PileupElement p : pileup ) {
|
||||||
if ( isUsableBase(p) ) {
|
if ( isUsableBase(p) ) {
|
||||||
if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
|
if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
|
||||||
|
|
@ -43,14 +43,13 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
|
||||||
}
|
}
|
||||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
|
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||||
|
// BUGBUG: There needs to be a comparable isUsableBase check here
|
||||||
if (a.isNoCall())
|
if (a.isNoCall())
|
||||||
continue; // read is non-informative
|
continue; // read is non-informative
|
||||||
if (a.isReference())
|
if (a.isReference())
|
||||||
refQuals.add((double)el.getKey().getMappingQuality());
|
refQuals.add((double)el.getKey().getMappingQuality());
|
||||||
else if (allAlleles.contains(a))
|
else if (allAlleles.contains(a))
|
||||||
altQuals.add((double)el.getKey().getMappingQuality());
|
altQuals.add((double)el.getKey().getMappingQuality());
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
||||||
ReadBackedPileup pileup = null;
|
ReadBackedPileup pileup = null;
|
||||||
|
|
||||||
|
|
||||||
if (stratifiedContexts != null) {
|
if (stratifiedContexts != null) { // the old UG SNP-only path through the annotations
|
||||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||||
if ( context != null )
|
if ( context != null )
|
||||||
pileup = context.getBasePileup();
|
pileup = context.getBasePileup();
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
||||||
final List<Double> refQuals, final List<Double> altQuals) {
|
final List<Double> refQuals, final List<Double> altQuals) {
|
||||||
|
|
||||||
if (alleleLikelihoodMap == null) {
|
if (alleleLikelihoodMap == null) {
|
||||||
// use fast SNP-based version if we don't have per-read allele likelihoods
|
// use old UG SNP-based version if we don't have per-read allele likelihoods
|
||||||
for ( final PileupElement p : pileup ) {
|
for ( final PileupElement p : pileup ) {
|
||||||
if ( isUsableBase(p) ) {
|
if ( isUsableBase(p) ) {
|
||||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||||
|
|
|
||||||
|
|
@ -282,7 +282,8 @@ public class DiffEngine {
|
||||||
// now that we have a specific list of values we want to show, display them
|
// now that we have a specific list of values we want to show, display them
|
||||||
GATKReport report = new GATKReport();
|
GATKReport report = new GATKReport();
|
||||||
final String tableName = "differences";
|
final String tableName = "differences";
|
||||||
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
|
// TODO for Geraldine -- link needs to be updated below
|
||||||
|
report.addTable(tableName, "Summarized differences between the master and test files. See [ask Geraldine to fix link to DiffEngine wiki] for more information", 3);
|
||||||
final GATKReportTable table = report.getTable(tableName);
|
final GATKReportTable table = report.getTable(tableName);
|
||||||
table.addColumn("Difference");
|
table.addColumn("Difference");
|
||||||
table.addColumn("NumberOfOccurrences");
|
table.addColumn("NumberOfOccurrences");
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,8 @@ public class DiffObjects extends RodWalker<Integer, Integer> {
|
||||||
/**
|
/**
|
||||||
* Writes out a file of the DiffEngine format:
|
* Writes out a file of the DiffEngine format:
|
||||||
*
|
*
|
||||||
* http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine
|
* TODO for Geraldine -- link needs to be updated below (and also in SelectVariants and RefSeqCodec GATK docs)
|
||||||
|
* http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine
|
||||||
*/
|
*/
|
||||||
@Output(doc="File to which results should be written",required=true)
|
@Output(doc="File to which results should be written",required=true)
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,12 @@ import java.util.List;
|
||||||
* <p>
|
* <p>
|
||||||
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
||||||
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
|
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
|
||||||
|
*
|
||||||
|
* The output format can be partially controlled using the provided command-line arguments.
|
||||||
|
* Specify intervals with the usual -L argument to output only the reference bases within your intervals.
|
||||||
|
* Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
|
||||||
|
* separate fasta sequence (named numerically in order).
|
||||||
|
*
|
||||||
* Several important notes:
|
* Several important notes:
|
||||||
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
|
* 1) if there are multiple variants that start at a site, it chooses one of them randomly.
|
||||||
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
|
* 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ import java.util.Set;
|
||||||
/**
|
/**
|
||||||
* Stratifies the eval RODs by user-supplied JEXL expressions
|
* Stratifies the eval RODs by user-supplied JEXL expressions
|
||||||
*
|
*
|
||||||
* See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details
|
* See http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk for more details
|
||||||
*/
|
*/
|
||||||
public class JexlExpression extends VariantStratifier implements StandardStratification {
|
public class JexlExpression extends VariantStratifier implements StandardStratification {
|
||||||
// needs to know the jexl expressions
|
// needs to know the jexl expressions
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -80,7 +81,7 @@ public class VariantDataManager {
|
||||||
final double theSTD = standardDeviation(theMean, iii);
|
final double theSTD = standardDeviation(theMean, iii);
|
||||||
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
|
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
|
||||||
if( Double.isNaN(theMean) ) {
|
if( Double.isNaN(theMean) ) {
|
||||||
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See http://www.broadinstitute.org/gsa/wiki/index.php/VariantAnnotator");
|
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.GATK_FORUM_URL + "discussion/49/using-variant-annotator");
|
||||||
}
|
}
|
||||||
|
|
||||||
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
|
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,7 @@ import java.util.*;
|
||||||
* The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
|
* The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
|
||||||
* The tool produces a VCF that is annotated with information pertaining to plate quality control and by
|
* The tool produces a VCF that is annotated with information pertaining to plate quality control and by
|
||||||
* default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
|
* default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
|
||||||
* If you have .ped files, please first convert them to VCF format
|
* If you have .ped files, please first convert them to VCF format.
|
||||||
* (see http://www.broadinstitute.org/gsa/wiki/index.php/Converting_ped_to_vcf).
|
|
||||||
*
|
*
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
||||||
|
|
@ -374,7 +374,7 @@ public final class GenomeLocParser {
|
||||||
int start = 1;
|
int start = 1;
|
||||||
int stop = -1;
|
int stop = -1;
|
||||||
|
|
||||||
final int colonIndex = str.indexOf(":");
|
final int colonIndex = str.lastIndexOf(":");
|
||||||
if(colonIndex == -1) {
|
if(colonIndex == -1) {
|
||||||
contig = str.substring(0, str.length()); // chr1
|
contig = str.substring(0, str.length()); // chr1
|
||||||
stop = Integer.MAX_VALUE;
|
stop = Integer.MAX_VALUE;
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
// our private storage for the GenomeLoc's
|
// our private storage for the GenomeLoc's
|
||||||
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
|
// cache this to make overlap checking much more efficient
|
||||||
|
private int previousOverlapSearchIndex = -1;
|
||||||
|
|
||||||
/** default constructor */
|
/** default constructor */
|
||||||
public GenomeLocSortedSet(GenomeLocParser parser) {
|
public GenomeLocSortedSet(GenomeLocParser parser) {
|
||||||
this.genomeLocParser = parser;
|
this.genomeLocParser = parser;
|
||||||
|
|
@ -101,7 +104,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
* Return the number of bps before loc in the sorted set
|
* Return the number of bps before loc in the sorted set
|
||||||
*
|
*
|
||||||
* @param loc the location before which we are counting bases
|
* @param loc the location before which we are counting bases
|
||||||
* @return
|
* @return the number of base pairs over all previous intervals
|
||||||
*/
|
*/
|
||||||
public long sizeBeforeLoc(GenomeLoc loc) {
|
public long sizeBeforeLoc(GenomeLoc loc) {
|
||||||
long s = 0;
|
long s = 0;
|
||||||
|
|
@ -110,7 +113,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
if ( e.isBefore(loc) )
|
if ( e.isBefore(loc) )
|
||||||
s += e.size();
|
s += e.size();
|
||||||
else if ( e.isPast(loc) )
|
else if ( e.isPast(loc) )
|
||||||
; // don't do anything
|
break; // we are done
|
||||||
else // loc is inside of s
|
else // loc is inside of s
|
||||||
s += loc.getStart() - e.getStart();
|
s += loc.getStart() - e.getStart();
|
||||||
}
|
}
|
||||||
|
|
@ -131,15 +134,43 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
* Determine if the given loc overlaps any loc in the sorted set
|
* Determine if the given loc overlaps any loc in the sorted set
|
||||||
*
|
*
|
||||||
* @param loc the location to test
|
* @param loc the location to test
|
||||||
* @return
|
* @return trip if the location overlaps any loc
|
||||||
*/
|
*/
|
||||||
public boolean overlaps(final GenomeLoc loc) {
|
public boolean overlaps(final GenomeLoc loc) {
|
||||||
for(final GenomeLoc e : mArray) {
|
// edge condition
|
||||||
if(e.overlapsP(loc)) {
|
if ( mArray.isEmpty() )
|
||||||
return true;
|
return false;
|
||||||
}
|
|
||||||
|
// use the cached version first
|
||||||
|
if ( previousOverlapSearchIndex != -1 && overlapsAtOrImmediatelyAfterCachedIndex(loc, true) )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// update the cached index
|
||||||
|
previousOverlapSearchIndex = Collections.binarySearch(mArray, loc);
|
||||||
|
|
||||||
|
// if it matches an interval exactly, we are done
|
||||||
|
if ( previousOverlapSearchIndex >= 0 )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// check whether it overlaps the interval before or after the insertion point
|
||||||
|
previousOverlapSearchIndex = Math.max(0, -1 * previousOverlapSearchIndex - 2);
|
||||||
|
return overlapsAtOrImmediatelyAfterCachedIndex(loc, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean overlapsAtOrImmediatelyAfterCachedIndex(final GenomeLoc loc, final boolean updateCachedIndex) {
|
||||||
|
// check the cached entry
|
||||||
|
if ( mArray.get(previousOverlapSearchIndex).overlapsP(loc) )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// check the entry after the cached entry since we may have moved to it
|
||||||
|
boolean returnValue = false;
|
||||||
|
if ( previousOverlapSearchIndex < mArray.size() - 1 ) {
|
||||||
|
returnValue = mArray.get(previousOverlapSearchIndex + 1).overlapsP(loc);
|
||||||
|
if ( updateCachedIndex )
|
||||||
|
previousOverlapSearchIndex++;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
return returnValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -155,7 +186,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||||
mArray.add(e);
|
mArray.add(e);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
int loc = Collections.binarySearch(mArray,e);
|
final int loc = Collections.binarySearch(mArray,e);
|
||||||
if (loc >= 0) {
|
if (loc >= 0) {
|
||||||
throw new ReviewedStingException("Genome Loc Sorted Set already contains the GenomicLoc " + e.toString());
|
throw new ReviewedStingException("Genome Loc Sorted Set already contains the GenomicLoc " + e.toString());
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -705,11 +705,13 @@ public class Utils {
|
||||||
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
||||||
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
||||||
for ( SAMProgramRecord record : oldRecords )
|
for ( SAMProgramRecord record : oldRecords )
|
||||||
if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
|
if ( (programRecord != null && !record.getId().startsWith(programRecord.getId())) || KEEP_ALL_PG_RECORDS )
|
||||||
newRecords.add(record);
|
newRecords.add(record);
|
||||||
|
|
||||||
newRecords.add(programRecord);
|
if (programRecord != null) {
|
||||||
header.setProgramRecords(newRecords);
|
newRecords.add(programRecord);
|
||||||
|
header.setProgramRecords(newRecords);
|
||||||
|
}
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
|
@ -267,7 +268,7 @@ public class UserException extends ReviewedStingException {
|
||||||
|
|
||||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||||
public ReadMissingReadGroup(SAMRecord read) {
|
public ReadMissingReadGroup(SAMRecord read) {
|
||||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpUtils.GATK_FORUM_URL + "discussion/59/companion-utilities-replacereadgroups to fix this problem", read.getReadName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -343,7 +344,7 @@ public class UserException extends ReviewedStingException {
|
||||||
super(String.format("Lexicographically sorted human genome sequence detected in %s."
|
super(String.format("Lexicographically sorted human genome sequence detected in %s."
|
||||||
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
|
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
|
||||||
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
||||||
+ "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
|
+ "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.GATK_FORUM_URL + "discussion/58/companion-utilities-reordersam"
|
||||||
+ "\n %s contigs = %s",
|
+ "\n %s contigs = %s",
|
||||||
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,14 +44,13 @@ public class ForumAPIUtils {
|
||||||
/**
|
/**
|
||||||
* How we post to the forum
|
* How we post to the forum
|
||||||
*/
|
*/
|
||||||
private final static String API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
|
|
||||||
final private static String ACCESS_TOKEN = "access_token=";
|
final private static String ACCESS_TOKEN = "access_token=";
|
||||||
|
|
||||||
public static List<String> getPostedTools(String forumKey) {
|
public static List<String> getPostedTools(String forumKey) {
|
||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
List<String> output = new ArrayList<String>();
|
List<String> output = new ArrayList<String>();
|
||||||
|
|
||||||
String text = httpGet(API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
|
String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
|
||||||
|
|
||||||
APIQuery details = gson.fromJson(text, APIQuery.class);
|
APIQuery details = gson.fromJson(text, APIQuery.class);
|
||||||
ForumDiscussion[] discussions = details.Discussions;
|
ForumDiscussion[] discussions = details.Discussions;
|
||||||
|
|
@ -159,7 +158,7 @@ public class ForumAPIUtils {
|
||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
|
|
||||||
String data = gson.toJson(post.getPostData());
|
String data = gson.toJson(post.getPostData());
|
||||||
httpPost(data, API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
|
httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -167,8 +166,7 @@ public class ForumAPIUtils {
|
||||||
class APIQuery {
|
class APIQuery {
|
||||||
ForumDiscussion[] Discussions;
|
ForumDiscussion[] Discussions;
|
||||||
|
|
||||||
public APIQuery() {
|
public APIQuery() {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ public class GATKDocUtils {
|
||||||
/**
|
/**
|
||||||
* The URL root for RELEASED GATKDOC units
|
* The URL root for RELEASED GATKDOC units
|
||||||
*/
|
*/
|
||||||
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gatk/gatkdocs/";
|
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
|
||||||
/**
|
/**
|
||||||
* The URL root for STABLE GATKDOC units
|
* The URL root for STABLE GATKDOC units
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,15 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
public class HelpUtils {
|
public class HelpUtils {
|
||||||
|
|
||||||
|
public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
|
||||||
|
public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
|
||||||
|
public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
|
||||||
|
public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
|
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
|
||||||
try {
|
try {
|
||||||
Class type = getClassForDoc(classDoc);
|
Class type = getClassForDoc(classDoc);
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,6 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
|
||||||
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
||||||
|
|
||||||
// has to be PriorityBlockingQueue to be thread-safe
|
// has to be PriorityBlockingQueue to be thread-safe
|
||||||
// see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
|
||||||
this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());
|
this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());
|
||||||
|
|
||||||
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -74,6 +76,23 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
||||||
genomeLocParser.parseGenomeLoc("Bad:0-1");
|
genomeLocParser.parseGenomeLoc("Bad:0-1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContigHasColon() {
|
||||||
|
SAMFileHeader header = new SAMFileHeader();
|
||||||
|
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
|
||||||
|
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||||
|
SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
|
||||||
|
rec.setSequenceLength(10);
|
||||||
|
dict.addSequence(rec);
|
||||||
|
header.setSequenceDictionary(dict);
|
||||||
|
|
||||||
|
final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
|
GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
|
||||||
|
assertEquals(0, loc.getContigIndex());
|
||||||
|
assertEquals(loc.getStart(), 4);
|
||||||
|
assertEquals(loc.getStop(), 5);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseGoodString() {
|
public void testParseGoodString() {
|
||||||
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
|
||||||
|
|
@ -81,7 +100,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
||||||
assertEquals(loc.getStop(), 10);
|
assertEquals(loc.getStop(), 10);
|
||||||
assertEquals(loc.getStart(), 1);
|
assertEquals(loc.getStart(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateGenomeLoc1() {
|
public void testCreateGenomeLoc1() {
|
||||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
|
||||||
import static org.testng.Assert.assertEquals;
|
import static org.testng.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.assertFalse;
|
||||||
import static org.testng.Assert.assertTrue;
|
import static org.testng.Assert.assertTrue;
|
||||||
|
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
|
|
@ -117,6 +118,41 @@ public class GenomeLocSortedSetUnitTest extends BaseTest {
|
||||||
assertTrue(loc.getContigIndex() == 1);
|
assertTrue(loc.getContigIndex() == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void overlap() {
|
||||||
|
for ( int i = 1; i < 6; i++ ) {
|
||||||
|
final int start = i * 10;
|
||||||
|
mSortedSet.add(genomeLocParser.createGenomeLoc(contigOneName, start, start + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// test matches in and around interval
|
||||||
|
assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 9, 9)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 10, 10)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 11)));
|
||||||
|
assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 12)));
|
||||||
|
|
||||||
|
// test matches spanning intervals
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 14, 20)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 15)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 30, 40)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
|
||||||
|
|
||||||
|
// test miss
|
||||||
|
assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 19)));
|
||||||
|
|
||||||
|
// test exact match after miss
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 40, 41)));
|
||||||
|
|
||||||
|
// test matches at beginning of intervals
|
||||||
|
assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 5, 6)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 0, 10)));
|
||||||
|
|
||||||
|
// test matches at end of intervals
|
||||||
|
assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
|
||||||
|
assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
|
||||||
|
assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void mergingOverlappingAbove() {
|
public void mergingOverlappingAbove() {
|
||||||
GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);
|
GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
|
||||||
public void testReadingAndWritingBreakpointAlleles() {
|
public void testReadingAndWritingBreakpointAlleles() {
|
||||||
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue