From 1ead00cac5d027b45633b58bb8701f07f57c7166 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Sun, 18 Dec 2011 19:04:26 -0500 Subject: [PATCH 1/8] New fork of SamFileHeaderMerger should be cached at the thread level to enable fast (and valid) thread lookups. --- .../gatk/datasources/reads/SAMDataSource.java | 77 +++++++++++-------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 75a851d65..2e243b847 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -100,11 +100,6 @@ public class SAMDataSource { */ private final Map readerPositions = new HashMap(); - /** - * Cached representation of the merged header used to generate a merging iterator. - */ - private final SamFileHeaderMerger headerMerger; - /** * The merged header. */ @@ -300,9 +295,8 @@ public class SAMDataSource { initializeReaderPositions(readers); - headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); - mergedHeader = headerMerger.getMergedHeader(); - hasReadGroupCollisions = headerMerger.hasReadGroupCollisions(); + mergedHeader = readers.getMergedHeader(); + hasReadGroupCollisions = readers.hasReadGroupCollisions(); readProperties = new ReadProperties( samFiles, @@ -327,9 +321,9 @@ public class SAMDataSource { List readGroups = reader.getFileHeader().getReadGroups(); for(SAMReadGroupRecord readGroup: readGroups) { - if(headerMerger.hasReadGroupCollisions()) { - mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId())); - mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId()); + if(hasReadGroupCollisions) { + mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId())); + mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId()); } else { mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); @@ -562,7 +556,7 @@ public class SAMDataSource { */ private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) { // Set up merging to dynamically merge together multiple BAMs. - MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true); + MergingSamRecordIterator mergingIterator = readers.createMergingIterator(); for(SAMReaderID id: getReaderIDs()) { CloseableIterator iterator = null; @@ -707,6 +701,11 @@ public class SAMDataSource { * A collection of readers derived from a reads metadata structure. */ private class SAMReaders implements Iterable { + /** + * Cached representation of the merged header used to generate a merging iterator. + */ + private final SamFileHeaderMerger headerMerger; + /** * Internal storage for a map of id -> reader. */ @@ -798,6 +797,11 @@ public class SAMDataSource { } if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); + + Collection headers = new LinkedList(); + for(SAMFileReader reader: readers.values()) + headers.add(reader.getFileHeader()); + headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); } final private void printReaderPerformance(final int nExecutedTotal, @@ -814,7 +818,37 @@ public class SAMDataSource { nExecutedInTick, tickDurationInSec, nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, nRemaining, estTimeToComplete, estTimeToComplete / 60)); + } + /** + * Return the header derived from the merging of these BAM files. + * @return the merged header. + */ + public SAMFileHeader getMergedHeader() { + return headerMerger.getMergedHeader(); + } + + /** + * Do multiple read groups collide in this dataset? + * @return True if multiple read groups collide; false otherwis. + */ + public boolean hasReadGroupCollisions() { + return headerMerger.hasReadGroupCollisions(); + } + + /** + * Get the newly mapped read group ID for the given read group. + * @param readerID Reader for which to discern the transformed ID. + * @param originalReadGroupID Original read group. + * @return Remapped read group. + */ + public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) { + SAMFileHeader header = readers.get(readerID).getFileHeader(); + return headerMerger.getReadGroupId(header,originalReadGroupID); + } + + public MergingSamRecordIterator createMergingIterator() { + return new MergingSamRecordIterator(headerMerger,readers.values(),true); } /** @@ -866,25 +900,6 @@ public class SAMDataSource { public boolean isEmpty() { return readers.isEmpty(); } - - /** - * Gets all the actual readers out of this data structure. - * @return A collection of the readers. - */ - public Collection values() { - return readers.values(); - } - - /** - * Gets all the actual readers out of this data structure. - * @return A collection of the readers. - */ - public Collection headers() { - ArrayList headers = new ArrayList(readers.size()); - for (SAMFileReader reader : values()) - headers.add(reader.getFileHeader()); - return headers; - } } class ReaderInitializer implements Callable { From 5b678e3b94a086c40a8cd7baf18dba713d07fe50 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sun, 18 Dec 2011 12:26:06 -0500 Subject: [PATCH 2/8] Remove ClippingOp UnitTests * all testing functionality is in the ReadClipperUnitTest, no need to double test. * class and package naming cleanup --- .../sting/gatk/walkers/ClipReadsWalker.java | 6 +- .../{clipreads => clipping}/ClippingOp.java | 2 +- .../ClippingRepresentation.java | 2 +- .../{clipreads => clipping}/ReadClipper.java | 2 +- .../ReadClipperTestUtils.java} | 8 ++- .../ReadClipperUnitTest.java | 24 +++---- .../utils/clipreads/ClippingOpUnitTest.java | 67 ------------------- 7 files changed, 23 insertions(+), 88 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/{clipreads => clipping}/ClippingOp.java (99%) rename public/java/src/org/broadinstitute/sting/utils/{clipreads => clipping}/ClippingRepresentation.java (95%) rename public/java/src/org/broadinstitute/sting/utils/{clipreads => clipping}/ReadClipper.java (99%) rename public/java/test/org/broadinstitute/sting/utils/{clipreads/ClipReadsTestUtils.java => clipping/ReadClipperTestUtils.java} (98%) rename public/java/test/org/broadinstitute/sting/utils/{clipreads => clipping}/ReadClipperUnitTest.java (94%) delete mode 100644 public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index d1148cbd5..ff3867120 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -38,9 +38,9 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.clipreads.ClippingOp; -import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; -import org.broadinstitute.sting.utils.clipreads.ReadClipper; +import org.broadinstitute.sting.utils.clipping.ClippingOp; +import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; +import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 0bb8c3e6f..f440eda5d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.Cigar; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java index d574ba2f0..c9d8730d1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; /** * How should we represent a clipped bases in a read? diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index e6b3ce929..6e3f37980 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java rename to public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index 048d9d8e0..18108e0a1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; @@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.Stack; /** * Created by IntelliJ IDEA. @@ -17,7 +19,7 @@ import java.util.*; * Time: 6:45 AM * To change this template use File | Settings | File Templates. */ -public class ClipReadsTestUtils { +public class ReadClipperTestUtils { //Should contain all the utils needed for tests to mass produce //reads, cigars, and other needed classes diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java similarity index 94% rename from public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index fcde5f360..3b4b0abce 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; @@ -50,13 +50,13 @@ public class ReadClipperUnitTest extends BaseTest { @BeforeClass public void init() { - cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize); + cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize); } @Test(enabled = true) public void testHardClipBothEndsByReferenceCoordinates() { for (Cigar cigar : cigarList) { - GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); int readLength = alnStart - alnEnd; @@ -71,7 +71,7 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipByReadCoordinates() { for (Cigar cigar : cigarList) { - GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); for (int i=0; i %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java deleted file mode 100644 index dd674ff1c..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.broadinstitute.sting.utils.clipreads; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - - -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 11/27/11 - * Time: 5:17 AM - * To change this template use File | Settings | File Templates. - */ -public class ClippingOpUnitTest extends BaseTest { - - ClippingOp clippingOp; - GATKSAMRecord read; - - @BeforeTest - public void init() { - read = ClipReadsTestUtils.makeRead(); - } - - @Test - private void testHardClip() { -// List testList = new LinkedList(); -// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start -// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end -// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start -// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end -// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start -// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end -// -// for (TestParameter p : testList) { -// init(); -// clippingOp = new ClippingOp(p.inputStart, p.inputStop); -// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar); -// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read), -// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), -// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), -// p.cigar); -// } - - } - - @Test - private void testSoftClip() { -// List testList = new LinkedList(); -// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start -// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end -// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start -// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end -// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start -// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end -// -// for (TestParameter p : testList) { -// init(); -// clippingOp = new ClippingOp(p.inputStart, p.inputStop); -// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar); -// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read), -// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar); -// } - - } -} From 3069a689fe7dcca14200a6ed1ba1fdff3a31dbf8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Dec 2011 10:04:33 -0500 Subject: [PATCH 4/8] Bug fix: if there are multiple records at a given position, it turns out that SelectVariants would drop all variants that follow after one that fails filters (instead of dropping just the failing one). Added an integration test to cover this case. --- .../walkers/variantutils/SelectVariants.java | 20 +++++++++++-------- .../SelectVariantsIntegrationTest.java | 13 ++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 0af351750..6d94ffe6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -493,12 +493,12 @@ public class SelectVariants extends RodWalker implements TreeR if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) - return 0; + continue; } if (CONCORDANCE_ONLY) { Collection compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) - return 0; + continue; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) @@ -512,16 +512,20 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, samples); if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { + boolean failedJexlMatch = false; for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { if ( !VariantContextUtils.match(sub, jexl) ) { - return 0; + failedJexlMatch = true; + break; } } - if (SELECT_RANDOM_NUMBER) { - randomlyAddVariant(++variantNumber, sub); - } - else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { - vcfWriter.add(sub); + if ( !failedJexlMatch ) { + if (SELECT_RANDOM_NUMBER) { + randomlyAddVariant(++variantNumber, sub); + } + else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { + vcfWriter.add(sub); + } } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 72a07bd0e..042de2a27 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testUsingDbsnpName--" + testFile, spec); } + @Test + public void testMultipleRecordsAtOnePosition() { + String testFile = validationDataLocation + "selectVariants.onePosition.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("20b52c96f5c48258494d072752b53693") + ); + + executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec); + } + @Test public void testParallelization() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; From 5383c506540ca58827d5d798d3390780d5e57512 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 19 Dec 2011 10:08:38 -0500 Subject: [PATCH 5/8] Protect ourselves when iteration is present but there's only a single iteration in queueJobReport.R --- .../broadinstitute/sting/queue/util/queueJobReport.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R index aff783b8e..d5ee3626f 100644 --- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R +++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R @@ -12,7 +12,7 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "Q-8271@gsa2.jobreport.txt" + inputFileName = "Q-26618@gsa4.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA @@ -129,9 +129,11 @@ plotGroup <- function(groupTable) { # as above, but averaging over all iterations groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") if ( dim(sub)[1] > 1 ) { - sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) - textplot(as.data.frame(sum), show.rownames=F) - title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + try({ # need a try here because we will fail to reduce when there's just a single iteration + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + }, silent=T) } } @@ -193,6 +195,7 @@ plotJobsGantt(gatkReportData, F, F) plotProgressByTime(gatkReportData) plotTimeByHost(gatkReportData) for ( group in gatkReportData ) { + print(group) plotGroup(group) } From 5fd19ae734b8cc94ee276c8909b02b2e47c787a1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Dec 2011 10:19:00 -0500 Subject: [PATCH 6/8] Commented exactly how the results are represented from the exact model so developers can know how to use them. --- .../genotyper/AlleleFrequencyCalculationResult.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java index cc72fde11..ed80dce0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java @@ -34,10 +34,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; */ public class AlleleFrequencyCalculationResult { - // note that the cell at position zero in the likelihoods/posteriors array is actually probability of non-ref (since it's marginalized over all alleles) + // IMPORTANT NOTE: + // These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N). + // For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that + // the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may + // be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0). + // In the bi-allelic case (where there are no other alternate alleles over which to marginalize), + // the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED. final double[][] log10AlleleFrequencyLikelihoods; final double[][] log10AlleleFrequencyPosteriors; + // These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles) double log10LikelihoodOfAFzero = 0.0; double log10PosteriorOfAFzero = 0.0; From 16cc2b864e3098e96eaff5727a5583775e392442 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 19 Dec 2011 13:41:47 +0100 Subject: [PATCH 7/8] - Corrected bug causing cases where both parents are HET to be accounted twice in the TDT calculation - Adapted TDT Integration test to corrected version of TDT Signed-off-by: Ryan Poplin --- .../annotator/TransmissionDisequilibriumTest.java | 15 +++++++-------- .../VariantAnnotatorIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 6cc8923e8..ecdde1e4f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT private double calculateTDT( final VariantContext vc, final Set triosToTest ) { - final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM); - final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM); + final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET); + final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); - final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET); - final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF); + final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); return (numer * numer) / denom; } - private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { - final double likelihoodVector[] = new double[triosToTest.size() * 2]; + private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) { + final double likelihoodVector[] = new double[triosToTest.size()]; int iii = 0; for( final Sample child : triosToTest ) { final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); - likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; - likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx]; } return MathUtils.sumLog10(likelihoodVector); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index ffb9aedcc..245fda3c7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTDTAnnotation() { - final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; + final String MD5 = "204e67536a17af7eaa6bf0a910818997"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,