From 596c1723aeec43ebf2feb7ede387b1954910736c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 25 Oct 2012 10:35:43 -0400 Subject: [PATCH 01/11] Hidden, unsupported ability of VariantEval to run AlleleCount stratification on sites-only VCFs. I'll expose it/add tests on it if people think this is generaly useful. User needs to specify total # of samples as command line argument since genotypes are not available. Also, fixes to large-scale validation script: lower -minIndelFrac threshold or else we'll kill most indels since default 0.25 is too high for pools, fix also VE stratifications and add one VE run where eval=1KG, comp=pool data and AC stratification based on 1KG annotation --- .../sting/gatk/walkers/varianteval/VariantEval.java | 12 ++++++++++++ .../varianteval/stratifications/AlleleCount.java | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index a73e125ad..201028d99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -183,6 +183,10 @@ public class VariantEval extends RodWalker implements TreeRedu @Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false) private boolean keepSitesWithAC0 = false; + @Hidden + @Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false) + private int numSamplesFromArgument = 0; + /** * If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying * variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc. @@ -589,6 +593,14 @@ public class VariantEval extends RodWalker implements TreeRedu public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; } public Set getSampleNamesForEvaluation() { return sampleNamesForEvaluation; } + public int getNumberOfSamplesForEvaluation() { + if (sampleNamesForEvaluation!= null && !sampleNamesForEvaluation.isEmpty()) + return sampleNamesForEvaluation.size(); + else { + return numSamplesFromArgument; + } + + } public Set getSampleNamesForStratification() { return sampleNamesForStratification; } public List> getComps() { return comps; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index e6efd4482..7197fc14c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier { // There are ploidy x n sample chromosomes // TODO -- generalize to handle multiple ploidy - nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy(); + nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy(); if ( nchrom < 2 ) throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample"); From c8e17a7adf410aa540f6e51091160f56bd4231e2 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 30 Oct 2012 13:57:54 -0400 Subject: [PATCH 02/11] totally experimental UG feature, to be removed --- .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3c4a97ec1..d9b46ad36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -190,6 +190,9 @@ public class UnifiedGenotyperEngine { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if ( vc != null ) results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); + else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) + results.add(generateEmptyContext(tracker, refContext, null, rawContext)); + } } } From 89bbe73a43c6e6ebb30ee564f7b04a432139f716 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 14 Nov 2012 14:39:04 -0500 Subject: [PATCH 03/11] Commenting out CMI pipeline test that wasn't meant to be in GATK repository (why was this merged??) --- .../gatk/walkers/annotator/VariantAnnotatorEngine.java | 6 +++++- .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index ee4f77752..725097ddc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -277,8 +277,12 @@ public class VariantAnnotatorEngine { if ( expression.fieldName.equals("ID") ) { if ( vc.hasID() ) infoAnnotations.put(expression.fullName, vc.getID()); + } else if (expression.fieldName.equals("ALT")) { + infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString()); + } else if ( vc.hasAttribute(expression.fieldName) ) { - infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName)); + infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName)); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 879a46ab0..22ed95365 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -190,8 +190,8 @@ public class UnifiedGenotyperEngine { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if ( vc != null ) results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); - // else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) - // results.add(generateEmptyContext(tracker, refContext, null, rawContext)); + else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) + results.add(generateEmptyContext(tracker, refContext, null, rawContext)); } } From a68e6810c90711d88d2a829d27818034f721eb12 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 14 Nov 2012 14:45:15 -0500 Subject: [PATCH 04/11] Back off experimental code that escaped last commit, not for general use yet --- .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 22ed95365..80bc04845 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -190,8 +190,9 @@ public class UnifiedGenotyperEngine { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if ( vc != null ) results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); - else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) - results.add(generateEmptyContext(tracker, refContext, null, rawContext)); +// todo - uncomment if we want to also emit a null ref call (with no QUAL) if there's no evidence for REF and if EMIT_ALL_SITES is set +// else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) +// results.add(generateEmptyContext(tracker, refContext, null, rawContext)); } } From b70fd4a2426a21f375776c1de54540a562539423 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 14 Nov 2012 11:08:48 -0500 Subject: [PATCH 05/11] Initial testing of the Active Region Traversal contract - TODO: many more tests and test cases --- .../traversals/TraverseActiveRegions.java | 5 +- .../utils/activeregion/ActivityProfile.java | 5 + .../traversals/TraverseActiveRegionsTest.java | 126 ++++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index a2c37944a..4fe83f331 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -34,6 +34,9 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); private final LinkedHashSet myReads = new LinkedHashSet(); + // package access for unit testing + ActivityProfile profile; + @Override public String getTraversalUnits() { return "active regions"; @@ -53,7 +56,7 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = new LinkedList(); - ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); + profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index e96eb843d..38cfbb38d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -103,6 +103,11 @@ public class ActivityProfile { isActiveList.add(result); } + // for unit testing + public List getActiveList() { + return isActiveList; + } + public int size() { return isActiveList.size(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java new file mode 100644 index 000000000..8740a8b68 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -0,0 +1,126 @@ +package org.broadinstitute.sting.gatk.traversals; + +import org.testng.Assert; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.executive.WindowMaker; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; + +/** + * Created with IntelliJ IDEA. + * User: thibault + * Date: 11/13/12 + * Time: 2:47 PM + */ +public class TraverseActiveRegionsTest extends BaseTest { + + private class DummyActiveRegionWalker extends ActiveRegionWalker { + private final double prob; + + public DummyActiveRegionWalker() { + this.prob = 1.0; + } + + @Override + public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + return new ActivityProfileResult(ref.getLocus(), prob); + } + + @Override + public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return 0; + } + } + + private final TraverseActiveRegions t = new TraverseActiveRegions(); + + private IndexedFastaSequenceFile reference; + private GenomeLocParser genomeLocParser; + private ActiveRegionWalker walker; + + @BeforeClass + private void init() throws FileNotFoundException { + reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference)); + SAMSequenceDictionary dictionary = reference.getSequenceDictionary(); + genomeLocParser = new GenomeLocParser(dictionary); + } + + @Test + public void testAllIntervalsSeen() throws Exception { + List intervals = new ArrayList(); + GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); + intervals.add(interval); + + LocusShardDataProvider dataProvider = createDataProvider(intervals); + + t.traverse(walker, dataProvider, 0); + + boolean allGenomeLocsSeen = true; + for (GenomeLoc loc : intervals) { + boolean thisGenomeLocSeen = false; + for (ActivityProfileResult active : t.profile.getActiveList()) { + if (loc.equals(active.getLoc())) { + thisGenomeLocSeen = true; + break; + } + } + if (!thisGenomeLocSeen) { + allGenomeLocsSeen = false; + break; + } + } + + Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile"); + } + + private LocusShardDataProvider createDataProvider(List intervals) { + walker = new DummyActiveRegionWalker(); + + StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList()); + Shard shard = new MockLocusShard(genomeLocParser, intervals); + WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + + GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + //engine.setReferenceDataSource(reference); + engine.setGenomeLocParser(genomeLocParser); + t.initialize(engine); + + return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + } +} From 855a68ae39c1f31623f9561af4f9e2b4e85a14ab Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 19 Nov 2012 08:06:58 -0500 Subject: [PATCH 06/11] Testing out the new github-hosted repos. Please ignore. --- testfile | 1 + 1 file changed, 1 insertion(+) create mode 100644 testfile diff --git a/testfile b/testfile new file mode 100644 index 000000000..6de7b8c69 --- /dev/null +++ b/testfile @@ -0,0 +1 @@ +This is a test file. From 2a16d6fa55140ff6835c57a737586fbb18d0452b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 19 Nov 2012 08:07:19 -0500 Subject: [PATCH 07/11] Revert "Testing out the new github-hosted repos. Please ignore." This reverts commit b6bf66cd088754e7fd3d5f105ca8b2551237f183. --- testfile | 1 - 1 file changed, 1 deletion(-) delete mode 100644 testfile diff --git a/testfile b/testfile deleted file mode 100644 index 6de7b8c69..000000000 --- a/testfile +++ /dev/null @@ -1 +0,0 @@ -This is a test file. From 9fc63efc307475f6b36ffad7e425b90bcab817cf Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 19 Nov 2012 09:34:15 -0500 Subject: [PATCH 08/11] Second test of new repos. Please ignore. --- testfile | 1 + 1 file changed, 1 insertion(+) create mode 100644 testfile diff --git a/testfile b/testfile new file mode 100644 index 000000000..524acfffa --- /dev/null +++ b/testfile @@ -0,0 +1 @@ +Test file From e1a5c3ce7a97e8eb92c64cc144340c81d01c9903 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 19 Nov 2012 09:34:32 -0500 Subject: [PATCH 09/11] Revert "Second test of new repos. Please ignore." This reverts commit 077532d870ddf53ec514b98f14534ca7dbf55331. --- testfile | 1 - 1 file changed, 1 deletion(-) delete mode 100644 testfile diff --git a/testfile b/testfile deleted file mode 100644 index 524acfffa..000000000 --- a/testfile +++ /dev/null @@ -1 +0,0 @@ -Test file