From 550fb498beb48681b75b98e59aef5e6d7ead4385 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 14 Dec 2011 18:43:44 -0500 Subject: [PATCH 02/36] Support for NT testing (default up to 4) for CC and UG -- Added convenience function addJobReportBinding to just new binding to the map (x -> y) as well --- .../src/org/broadinstitute/sting/queue/util/QJobReport.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala index bb14bb6e6..73d1c028a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -81,6 +81,10 @@ trait QJobReport extends Logging { this.reportFeatures = features.mapValues(_.toString) } + def addJobReportBinding(key: String, value: Any) { + this.reportFeatures += (key -> value.toString) + } + // copy the QJobReport information -- todo : what's the best way to do this? override def copySettingsTo(function: QFunction) { self.copySettingsTo(function) From fb1c9d2abcdea57ba25440e4c620001210ede257 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 16 Dec 2011 09:05:28 -0500 Subject: [PATCH 06/36] Restored serial version of reader initialization. Parallel mode is default. -- Serial version can be re-enabled with a static boolean, if we decide to return to the serial version --- .../gatk/datasources/reads/SAMDataSource.java | 137 +++++++++++------- 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index d5d8ab2ef..e3b469b3d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -63,6 +63,7 @@ import java.util.concurrent.*; */ public class SAMDataSource { final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); + final private static boolean USE_PARALLEL_LOADING = true; /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -714,68 +715,96 @@ public class SAMDataSource { * @param validationStringency validation stringency. */ public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { - final int N_THREADS = 8; - int totalNumberOfFiles = readerIDs.size(); + final int totalNumberOfFiles = readerIDs.size(); int readerNumber = 1; + final SimpleTimer timer = new SimpleTimer().start(); - ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); - final List inits = new ArrayList(totalNumberOfFiles); - Queue> futures = new LinkedList>(); - for (SAMReaderID readerID: readerIDs) { - logger.debug("Enqueuing for initialization: " + readerID.samFile); - final ReaderInitializer init = new ReaderInitializer(readerID); - inits.add(init); - futures.add(executor.submit(init)); - } - - final SimpleTimer timer = new SimpleTimer(); - try { - final int MAX_WAIT = 30 * 1000; - final int MIN_WAIT = 1 * 1000; - - timer.start(); - while ( ! futures.isEmpty() ) { - final int prevSize = futures.size(); - final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file - final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT)); - Thread.sleep(waitTimeInMS); - - Queue> pending = new LinkedList>(); - for ( final Future initFuture : futures ) { - if ( initFuture.isDone() ) { - final ReaderInitializer init = initFuture.get(); - if (threadAllocation.getNumIOThreads() > 0) { - inputStreams.put(init.readerID, init.blockInputStream); // get from initializer - } - logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID)); - readers.put(init.readerID, init.reader); - } else { - pending.add(initFuture); - } + logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial")); + if ( ! USE_PARALLEL_LOADING ) { + final int tickSize = 10; + int nExecutedTotal = 0; + long lastTick = timer.currentTime(); + for(final SAMReaderID readerID: readerIDs) { + final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (threadAllocation.getNumIOThreads() > 0) { + inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } - final int pendingSize = pending.size(); - final int nExecutedInTick = prevSize - pendingSize; - final int nExecutedTotal = totalNumberOfFiles - pendingSize; - final double totalTimeInSeconds = timer.getElapsedTime(); - final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds); - final int nRemaining = pendingSize; - final double estTimeToComplete = pendingSize / nTasksPerSecond; - logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", - nExecutedInTick, (int)(waitTimeInMS / 1000.0), - nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, - nRemaining, estTimeToComplete, estTimeToComplete / 60)); - - futures = pending; + logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile)); + readers.put(init.readerID,init.reader); + if ( nExecutedTotal++ % tickSize == 0) { + int tickInMS = (int)((timer.currentTime() - lastTick) / 1000.0); + printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInMS); + } } - } catch ( InterruptedException e ) { - throw new ReviewedStingException("Interrupted SAMReader initialization", e); - } catch ( ExecutionException e ) { - throw new ReviewedStingException("Execution exception during SAMReader initialization", e); + } else { + final int N_THREADS = 8; + + final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); + final List inits = new ArrayList(totalNumberOfFiles); + Queue> futures = new LinkedList>(); + for (final SAMReaderID readerID: readerIDs) { + logger.debug("Enqueuing for initialization: " + readerID.samFile); + final ReaderInitializer init = new ReaderInitializer(readerID); + inits.add(init); + futures.add(executor.submit(init)); + } + + try { + final int MAX_WAIT = 30 * 1000; + final int MIN_WAIT = 1 * 1000; + + while ( ! futures.isEmpty() ) { + final int prevSize = futures.size(); + final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file + final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT)); + Thread.sleep(waitTimeInMS); + + Queue> pending = new LinkedList>(); + for ( final Future initFuture : futures ) { + if ( initFuture.isDone() ) { + final ReaderInitializer init = initFuture.get(); + if (threadAllocation.getNumIOThreads() > 0) { + inputStreams.put(init.readerID, init.blockInputStream); // get from initializer + } + logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID)); + readers.put(init.readerID, init.reader); + } else { + pending.add(initFuture); + } + } + + final int nExecutedTotal = totalNumberOfFiles - pending.size(); + final int nExecutedInTick = prevSize - pending.size(); + printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, (int)(waitTimeInMS / 1000)); + futures = pending; + } + } catch ( InterruptedException e ) { + throw new ReviewedStingException("Interrupted SAMReader initialization", e); + } catch ( ExecutionException e ) { + throw new ReviewedStingException("Execution exception during SAMReader initialization", e); + } + + executor.shutdown(); } logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); - executor.shutdown(); + } + + final private void printReaderPerformance(final int nExecutedTotal, + final int nExecutedInTick, + final int totalNumberOfFiles, + final SimpleTimer timer, final int tickDuration) { + final int pendingSize = totalNumberOfFiles - nExecutedTotal; + final double totalTimeInSeconds = timer.getElapsedTime(); + final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds); + final int nRemaining = pendingSize; + final double estTimeToComplete = pendingSize / nTasksPerSecond; + logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", + nExecutedInTick, tickDuration, + nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, + nRemaining, estTimeToComplete, estTimeToComplete / 60)); + } /** From 3414ecfe2e07934539fbc194fe95ee8a879ab757 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 16 Dec 2011 09:07:40 -0500 Subject: [PATCH 07/36] Restored serial version of reader initialization. Serial mode is default, as the performance gains aren't so huge. -- Serial version can be re-enabled with a static boolean, if we decide to return to the serial version -- Comparison of serial and parallel reader with cached and uncached files: Initialization time: serial with 500 fully cached BAMs: 8.20 seconds Initialization time: serial with 500 uncached BAMs : 197.02 seconds Initialization time: parallel with 500 fully cached BAMs: 30.12 seconds Initialization time: parallel with 500 uncached BAMs : 75.47 seconds --- .../gatk/datasources/reads/SAMDataSource.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index e3b469b3d..d1a1cc71b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -63,7 +63,9 @@ import java.util.concurrent.*; */ public class SAMDataSource { final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); - final private static boolean USE_PARALLEL_LOADING = true; + + /** If true, we will load SAMReaders in parallel */ + final private static boolean USE_PARALLEL_LOADING = false; /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -721,7 +723,7 @@ public class SAMDataSource { logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial")); if ( ! USE_PARALLEL_LOADING ) { - final int tickSize = 10; + final int tickSize = 50; int nExecutedTotal = 0; long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { @@ -733,8 +735,9 @@ public class SAMDataSource { logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile)); readers.put(init.readerID,init.reader); if ( nExecutedTotal++ % tickSize == 0) { - int tickInMS = (int)((timer.currentTime() - lastTick) / 1000.0); - printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInMS); + double tickInSec = (timer.currentTime() - lastTick) / 1000.0; + printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec); + lastTick = timer.currentTime(); } } } else { @@ -776,7 +779,7 @@ public class SAMDataSource { final int nExecutedTotal = totalNumberOfFiles - pending.size(); final int nExecutedInTick = prevSize - pending.size(); - printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, (int)(waitTimeInMS / 1000)); + printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, waitTimeInMS / 1000.0); futures = pending; } } catch ( InterruptedException e ) { @@ -794,14 +797,15 @@ public class SAMDataSource { final private void printReaderPerformance(final int nExecutedTotal, final int nExecutedInTick, final int totalNumberOfFiles, - final SimpleTimer timer, final int tickDuration) { + final SimpleTimer timer, + final double tickDurationInSec) { final int pendingSize = totalNumberOfFiles - nExecutedTotal; final double totalTimeInSeconds = timer.getElapsedTime(); final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds); final int nRemaining = pendingSize; final double estTimeToComplete = pendingSize / nTasksPerSecond; - logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", - nExecutedInTick, tickDuration, + logger.info(String.format("Init %d BAMs in last %.2f s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", + nExecutedInTick, tickDurationInSec, nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, nRemaining, estTimeToComplete, estTimeToComplete / 60)); From 5aa79dacfc63895ab7ab96d1b17ea43b35230f37 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 16 Dec 2011 10:29:20 -0500 Subject: [PATCH 08/36] Changing hidden optimization argument to advanced. --- .../gatk/walkers/variantrecalibration/VariantRecalibrator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 339b8e0e6..7cc5b1625 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -175,7 +175,7 @@ public class VariantRecalibrator extends RodWalker Date: Fri, 16 Dec 2011 11:45:40 -0500 Subject: [PATCH 09/36] Minor bug fix for printing in SAMDataSource --- .../sting/gatk/datasources/reads/SAMDataSource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index a021aedd0..8d0df407e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -740,7 +740,7 @@ public class SAMDataSource { logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile)); readers.put(init.readerID,init.reader); - if ( nExecutedTotal++ % tickSize == 0) { + if ( ++nExecutedTotal % tickSize == 0) { double tickInSec = (timer.currentTime() - lastTick) / 1000.0; printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec); lastTick = timer.currentTime(); From d6d2f49c889d1b8323907e1d0aeaa0a6f430e48c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 16 Dec 2011 16:09:37 -0500 Subject: [PATCH 11/36] Don't print log if there are no BAMs --- .../sting/gatk/datasources/reads/SAMDataSource.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 8d0df407e..75a851d65 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -727,7 +727,7 @@ public class SAMDataSource { int readerNumber = 1; final SimpleTimer timer = new SimpleTimer().start(); - logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial")); + if ( totalNumberOfFiles > 0 ) logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial")); if ( ! USE_PARALLEL_LOADING ) { final int tickSize = 50; int nExecutedTotal = 0; @@ -797,7 +797,7 @@ public class SAMDataSource { executor.shutdown(); } - logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); + if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); } final private void printReaderPerformance(final int nExecutedTotal, From b6067be9526db68c36d497f1f4647916ce04bb2f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 16 Dec 2011 16:10:18 -0500 Subject: [PATCH 12/36] Support for selecting only variants with specific IDs from a file in SelectVariants -- Cleaned up unused variables as well --- .../walkers/variantutils/SelectVariants.java | 87 ++++++++++--------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index d20fb54aa..0af351750 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; @@ -42,6 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import java.io.File; +import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.*; @@ -140,7 +142,7 @@ import java.util.*; * -R ref.fasta \ * -T SelectVariants \ * --variant input.vcf \ - * -family NA12891+NA12892=NA12878 \ + * -bed family.ped \ * -mvq 50 \ * -o violations.vcf * @@ -250,16 +252,6 @@ public class SelectVariants extends RodWalker implements TreeR @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; - @Hidden - @Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false) - private File FAMILY_STRUCTURE_FILE = null; - - /** - * String formatted as dad+mom=child where these parameters determine which sample names are examined. - */ - @Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) - private String FAMILY_STRUCTURE = ""; - /** * This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure. */ @@ -286,13 +278,21 @@ public class SelectVariants extends RodWalker implements TreeR private double fractionGenotypes = 0; /** - * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. + * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. * When specified one or more times, a particular type of variant is selected. * - */ + */ @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false) private List TYPES_TO_INCLUDE = new ArrayList(); + /** + * If provided, we will only include variants whose ID field is present in this list of ids. The matching + * is exact string matching. The file format is just one ID per line + * + */ + @Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false) + private File rsIDFile = null; + @Hidden @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false) @@ -313,9 +313,9 @@ public class SelectVariants extends RodWalker implements TreeR } public enum NumberAlleleRestriction { - ALL, - BIALLELIC, - MULTIALLELIC + ALL, + BIALLELIC, + MULTIALLELIC } private ArrayList selectedTypes = new ArrayList(); @@ -339,17 +339,13 @@ public class SelectVariants extends RodWalker implements TreeR private int positionToAdd = 0; private RandomVariantStructure [] variantArray; - - /* Variables used for random selection with AF boosting */ - private ArrayList afBreakpoints = null; - private ArrayList afBoosts = null; - double bkDelta = 0.0; - private PrintStream outMVFileStream = null; - //Random number generator for the genotypes to remove + //Random number generator for the genotypes to remove private Random randomGenotypes = new Random(); + private Set IDsToKeep = null; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ @@ -437,7 +433,18 @@ public class SelectVariants extends RodWalker implements TreeR if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track"); - + /** load in the IDs file to a hashset for matching */ + if ( rsIDFile != null ) { + IDsToKeep = new HashSet(); + try { + for ( final String line : new XReadLines(rsIDFile).readLines() ) { + IDsToKeep.add(line.trim()); + } + logger.info("Selecting only variants with one of " + IDsToKeep.size() + " IDs from " + rsIDFile); + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(rsIDFile, e); + } + } } /** @@ -460,20 +467,23 @@ public class SelectVariants extends RodWalker implements TreeR } for (VariantContext vc : vcs) { + if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) ) + continue; + if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1) - break; + break; if (outMVFile != null){ for( String familyId : mv.getViolationFamilies()){ for(Sample sample : this.getSampleDB().getFamily(familyId)){ if(sample.getParents().size() > 0){ - outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + - "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), - vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), - sample.getMaternalID(), sample.getPaternalID(), sample.getID(), - vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), - vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), - vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() ); + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), + vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), + sample.getMaternalID(), sample.getPaternalID(), sample.getID(), + vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), + vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), + vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() ); } } @@ -513,10 +523,7 @@ public class SelectVariants extends RodWalker implements TreeR else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub); } - - } - } return 1; @@ -647,7 +654,7 @@ public class SelectVariants extends RodWalker implements TreeR // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) if ( vc.getAlleles().size() != sub.getAlleles().size() ) - newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); + newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); //Remove a fraction of the genotypes if needed if(fractionGenotypes>0){ @@ -655,10 +662,10 @@ public class SelectVariants extends RodWalker implements TreeR for ( Genotype genotype : newGC ) { //Set genotype to no call if it falls in the fraction. if(fractionGenotypes>0 && randomGenotypes.nextDouble() alleles = new ArrayList(2); - alleles.add(Allele.create((byte)'.')); - alleles.add(Allele.create((byte)'.')); - genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap(),false)); + ArrayList alleles = new ArrayList(2); + alleles.add(Allele.create((byte)'.')); + alleles.add(Allele.create((byte)'.')); + genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap(),false)); } else{ genotypes.add(genotype); From 1994c3e3bca05593d05693dd41de7bf30b21cbec Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 16 Dec 2011 16:33:13 -0500 Subject: [PATCH 14/36] Only print warning about allele incompatibility when running there are genotypes in the file in CombineVariants --- .../sting/utils/variantcontext/VariantContextUtils.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 91a018c4e..c9a4965c1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -619,8 +619,9 @@ public class VariantContextUtils { if (vc.alleles.size() == 1) continue; if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) { - logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", - genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); + if ( ! genotypes.isEmpty() ) + logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", + genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); genotypes = stripPLs(genotypes); // this will remove stale AC,AF attributed from vc calculateChromosomeCounts(vc, attributes, true); From c26295919e6cfb4903c11ea0646d2c93039de4d9 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 16 Dec 2011 14:36:27 -0500 Subject: [PATCH 15/36] Added hardClipBothEndsByReferenceCoordinates UnitTest for the ReadClipper --- .../utils/clipreads/ReadClipperUnitTest.java | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index fc1459ee0..2e2b5f373 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.util.List; @@ -48,10 +47,6 @@ import java.util.List; */ public class ReadClipperUnitTest extends BaseTest { - // TODO: exception testing, make cases that should fail will fail - - // TODO: add indels to all test cases - List cigarList; int maximumCigarSize = 10; @@ -64,6 +59,18 @@ public class ReadClipperUnitTest extends BaseTest { public void testHardClipBothEndsByReferenceCoordinates() { logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + int readLength = alnStart - alnEnd; + for (int i=0; i= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); + Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); + } + } + } @Test(enabled = true) @@ -116,7 +123,7 @@ public class ReadClipperUnitTest extends BaseTest { // Tests // Make sure the low qualities are gone - testNoLowQualBases(clipLeft, LOW_QUAL); + assertNoLowQualBases(clipLeft, LOW_QUAL); // Can't run this test with the current contract of no hanging insertions //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); @@ -131,7 +138,7 @@ public class ReadClipperUnitTest extends BaseTest { // Tests // Make sure the low qualities are gone - testNoLowQualBases(clipRight, LOW_QUAL); + assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString())); @@ -149,7 +156,7 @@ public class ReadClipperUnitTest extends BaseTest { // Tests // Make sure the low qualities are gone - testNoLowQualBases(clipBoth, LOW_QUAL); + assertNoLowQualBases(clipBoth, LOW_QUAL); // Can't run this test with the current contract of no hanging insertions //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString())); @@ -158,11 +165,7 @@ public class ReadClipperUnitTest extends BaseTest { // logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString())); } - - - - - // ONE OFF Testing clipping that ends inside an insertion + // ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug ) final byte[] BASES = {'A','C','G','T','A','C','G','T'}; final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2}; final String CIGAR = "1S1M5I1S"; @@ -179,14 +182,6 @@ public class ReadClipperUnitTest extends BaseTest { ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); } - private void testNoLowQualBases(GATKSAMRecord read, byte low_qual) { - if (!read.isEmpty()) { - byte [] quals = read.getBaseQualities(); - for (int i=0; i %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString())); } } + + private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { + if (!read.isEmpty()) { + byte [] quals = read.getBaseQualities(); + for (int i=0; i Date: Fri, 16 Dec 2011 15:10:22 -0500 Subject: [PATCH 16/36] Added hardClipByReadCoordinates UnitTest for the ReadClipper --- .../utils/clipreads/ReadClipperUnitTest.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index 2e2b5f373..d93e0ac08 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -39,11 +39,8 @@ import org.testng.annotations.Test; import java.util.List; /** - * Created by IntelliJ IDEA. * User: roger * Date: 9/28/11 - * Time: 9:54 PM - * To change this template use File | Settings | File Templates. */ public class ReadClipperUnitTest extends BaseTest { @@ -57,8 +54,6 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipBothEndsByReferenceCoordinates() { - logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); - for (Cigar cigar : cigarList) { GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); @@ -70,18 +65,28 @@ public class ReadClipperUnitTest extends BaseTest { Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); } } - + logger.warn("PASSED"); } @Test(enabled = true) public void testHardClipByReadCoordinates() { - logger.warn("Executing testHardClipByReadCoordinates"); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + int readLength = read.getReadLength(); + for (int i=0; i %s", i, read.getCigarString(), clipLeft.getCigarString())); + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1); + Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString())); + } + } + logger.warn("PASSED"); } @Test(enabled = true) public void testHardClipByReferenceCoordinates() { - logger.warn("Executing testHardClipByReferenceCoordinates"); + logger.warn("PASSED"); } @@ -93,15 +98,12 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipByReferenceCoordinatesRightTail() { - init(); - logger.warn("Executing testHardClipByReferenceCoordinatesRightTail"); + logger.warn("PASSED"); } @Test(enabled = true) public void testHardClipLowQualEnds() { - logger.warn("Executing testHardClipLowQualEnds"); - final byte LOW_QUAL = 2; final byte HIGH_QUAL = 30; @@ -180,6 +182,8 @@ public class ReadClipperUnitTest extends BaseTest { ReadClipper lowQualClipper = new ReadClipper(read); ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); + + logger.warn("PASSED"); } @Test(enabled = true) @@ -233,6 +237,8 @@ public class ReadClipperUnitTest extends BaseTest { // logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString())); } + + logger.warn("PASSED"); } private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { From 5bba44d6934b06f944728cd3aad5ef191373a369 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 16 Dec 2011 16:36:31 -0500 Subject: [PATCH 17/36] Added hardClipByReferenceCoordinates UnitTest for the ReadClipper * fixed edge case when requested to hard clip beginning of a read that had hanging soft clipped bases on the left tail. * fixed edge case when requested to hard clip end of a read that had hanging soft clipped bases on the right tail. * fixed AlignmentStart of a clipped read that results in only hard clips and soft clips note: added tests to all these beautiful cases... --- .../sting/utils/clipreads/ClippingOp.java | 7 ++++-- .../sting/utils/clipreads/ReadClipper.java | 9 ++++++++ .../utils/clipreads/ReadClipperUnitTest.java | 23 ++++++++++++++++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 06985041e..0bb8c3e6f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -460,17 +460,20 @@ public class ClippingOp { int newShift = 0; int oldShift = 0; + boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift for (CigarElement cigarElement : newCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) newShift += cigarElement.getLength(); - else + else { + readHasStarted = true; break; + } } for (CigarElement cigarElement : oldCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP ) oldShift += Math.min(cigarElement.getLength(), newShift - oldShift); - else + else if (readHasStarted) break; } return newShift - oldShift; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 4bb309a48..461c9aef3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -63,12 +63,18 @@ public class ReadClipper { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); if ( start > stop ) throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + if ( start > 0 && stop < read.getReadLength() - 1) + throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); + this.addOp(new ClippingOp(start, stop)); GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; @@ -76,6 +82,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index d93e0ac08..193eec661 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -86,14 +87,30 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipByReferenceCoordinates() { - logger.warn("PASSED"); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + for (int i=alnStart; i<=alnEnd; i++) { + if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + if (!clipLeft.isEmpty()) + Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); + } + if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); + } + } + } + logger.warn("PASSED"); } @Test(enabled = true) public void testHardClipByReferenceCoordinatesLeftTail() { - logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); - + logger.warn("PASSED"); } @Test(enabled = true) From 075be52adc2f544812270905bceb912da3b4961c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 16 Dec 2011 17:05:46 -0500 Subject: [PATCH 18/36] Added hardClipByReferenceCoordinates (left and right tails) UnitTest for the ReadClipper --- .../utils/clipreads/ReadClipperUnitTest.java | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index 193eec661..655b1e709 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -110,13 +110,36 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipByReferenceCoordinatesLeftTail() { + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + for (int i=alnStart; i<=alnEnd; i++) { + GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + if (!clipLeft.isEmpty()) + Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); + } + } + } logger.warn("PASSED"); } @Test(enabled = true) public void testHardClipByReferenceCoordinatesRightTail() { + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + for (int i=alnStart; i<=alnEnd; i++) { + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); + } + } + } logger.warn("PASSED"); - } @Test(enabled = true) From fcc21180e8b3b9c7f54b011d2e8e2f1521a05805 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 16 Dec 2011 17:22:40 -0500 Subject: [PATCH 19/36] Added hardClipLeadingInsertions UnitTest for the ReadClipper fixed issue where a read starting with an insertion followed by a deletion would break, clipper can now safely clip the insertion and the deletion if that's the case. note: test is turned off until contract changes to allow hanging insertions (left/right). --- .../sting/utils/clipreads/ReadClipper.java | 8 +--- .../utils/clipreads/ClipReadsTestUtils.java | 12 +++++ .../utils/clipreads/ReadClipperUnitTest.java | 46 +++++++++++++++++-- 3 files changed, 57 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 461c9aef3..e6b3ce929 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -204,16 +204,12 @@ public class ReadClipper { for(CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && - cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) + cigarElement.getOperator() != CigarOperator.INSERTION) break; - else if (cigarElement.getOperator() == CigarOperator.INSERTION) { + else if (cigarElement.getOperator() == CigarOperator.INSERTION) this.addOp(new ClippingOp(0, cigarElement.getLength() - 1)); - } - else if (cigarElement.getOperator() == CigarOperator.DELETION) { - throw new ReviewedStingException("No read should start with a deletion. Aligner bug?"); - } } return clipRead(ClippingRepresentation.HARDCLIP_BASES); } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java index c6dc3a833..048d9d8e0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java @@ -176,4 +176,16 @@ public class ClipReadsTestUtils { Assert.assertEquals(actual.isEmpty(), expected.isEmpty()); } + public static Cigar invertCigar (Cigar cigar) { + Stack cigarStack = new Stack(); + for (CigarElement cigarElement : cigar.getCigarElements()) + cigarStack.push(cigarElement); + + Cigar invertedCigar = new Cigar(); + while (!cigarStack.isEmpty()) + invertedCigar.add(cigarStack.pop()); + + return invertedCigar; + } + } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index 655b1e709..249951139 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -46,7 +46,7 @@ import java.util.List; public class ReadClipperUnitTest extends BaseTest { List cigarList; - int maximumCigarSize = 10; + int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 @BeforeClass public void init() { @@ -277,10 +277,31 @@ public class ReadClipperUnitTest extends BaseTest { // logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString())); } - - logger.warn("PASSED"); } + @Test(enabled = false) + public void testHardClipLeadingInsertions() { + for (Cigar cigar : cigarList) { + if (startsWithInsertion(cigar)) { + GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions(); + + int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar()); + if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar())) + expectedLength -= leadingInsertionLength(ClipReadsTestUtils.invertCigar(read.getCigar())); + + if (! clippedRead.isEmpty()) { + Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there + Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone + } + else + Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped + } + } + } + + + private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { if (!read.isEmpty()) { byte [] quals = read.getBaseQualities(); @@ -289,5 +310,24 @@ public class ReadClipperUnitTest extends BaseTest { } } + private boolean startsWithInsertion(Cigar cigar) { + return leadingInsertionLength(cigar) > 0; + } + private int leadingInsertionLength(Cigar cigar) { + for (CigarElement cigarElement : cigar.getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.INSERTION) + return cigarElement.getLength(); + if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) + break; + } + return 0; + } + + private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) { + for (CigarElement cigarElement : cigar.getCigarElements()) + if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) + return true; + return false; + } } \ No newline at end of file From e5df9e06845bfddac3d1f5daddea7c89bc6a4efa Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 16 Dec 2011 17:08:18 -0500 Subject: [PATCH 20/36] cleaner test output cleaned up the debug "pass" messages in the unit tests --- .../sting/utils/clipreads/ReadClipperUnitTest.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index 249951139..fcde5f360 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -66,7 +66,6 @@ public class ReadClipperUnitTest extends BaseTest { Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); } } - logger.warn("PASSED"); } @Test(enabled = true) @@ -82,7 +81,6 @@ public class ReadClipperUnitTest extends BaseTest { Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString())); } } - logger.warn("PASSED"); } @Test(enabled = true) @@ -105,7 +103,6 @@ public class ReadClipperUnitTest extends BaseTest { } } } - logger.warn("PASSED"); } @Test(enabled = true) @@ -122,7 +119,6 @@ public class ReadClipperUnitTest extends BaseTest { } } } - logger.warn("PASSED"); } @Test(enabled = true) @@ -139,7 +135,6 @@ public class ReadClipperUnitTest extends BaseTest { } } } - logger.warn("PASSED"); } @Test(enabled = true) @@ -222,8 +217,6 @@ public class ReadClipperUnitTest extends BaseTest { ReadClipper lowQualClipper = new ReadClipper(read); ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); - - logger.warn("PASSED"); } @Test(enabled = true) From 7486696c076f73c02a61baaf2262f6f1cdc91de7 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Fri, 16 Dec 2011 18:07:26 -0500 Subject: [PATCH 21/36] When using bam list mode in HSP deriving VCF name from bam list instead of requiring an additional parameter. Creating a single temporary directory per ant test run instead of a putting temp files across all runs in the same directory. Updated various tests for above items and other small fixes. --- .../sting/utils/R/RScriptExecutor.java | 2 +- .../sting/utils/io/IOUtils.java | 2 +- .../org/broadinstitute/sting/BaseTest.java | 28 +++--- .../drmaa/v1_0/JnaSessionIntegrationTest.java | 2 +- .../drmaa/v1_0/LibDrmaaIntegrationTest.java | 4 +- .../jna/lsf/v7_0_6/LibBatIntegrationTest.java | 2 +- .../qscripts/examples/ExampleCountLoci.scala | 2 +- .../qscripts/examples/ExampleCountReads.scala | 6 +- .../examples/ExampleCustomWalker.scala | 2 +- .../examples/ExampleUnifiedGenotyper.scala | 1 - .../sting/queue/QCommandLine.scala | 2 +- .../ExampleCountLociPipelineTest.scala | 2 +- .../ExampleCountReadsPipelineTest.scala | 66 +++++++++++++ .../ExampleUnifiedGenotyperPipelineTest.scala | 92 +++++++++++++++++++ 14 files changed, 187 insertions(+), 26 deletions(-) create mode 100644 public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala create mode 100644 public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index d8176ff4e..d753da1c8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -109,7 +109,7 @@ public class RScriptExecutor { List tempFiles = new ArrayList(); try { - File tempLibDir = IOUtils.tempDir("R.", ".lib"); + File tempLibDir = IOUtils.tempDir("Rlib.", ""); tempFiles.add(tempLibDir); StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';"); diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java index 6f0573b02..b3fdb93d3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -79,7 +79,7 @@ public class IOUtils { tempDirParent = FileUtils.getTempDirectory(); if (!tempDirParent.exists() && !tempDirParent.mkdirs()) throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent); - File temp = File.createTempFile(prefix + "-", suffix, tempDirParent); + File temp = File.createTempFile(prefix, suffix, tempDirParent); if (!temp.delete()) throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath()); if (!temp.mkdir()) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 54d34fcfa..0f59131f5 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -3,9 +3,11 @@ package org.broadinstitute.sting; import org.apache.commons.io.FileUtils; import org.apache.log4j.*; import org.apache.log4j.spi.LoggingEvent; +import org.apache.xmlbeans.impl.common.IOUtil; import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.io.IOUtils; import org.testng.Assert; import javax.swing.*; @@ -78,8 +80,8 @@ public abstract class BaseTest { public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list"; public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list"; - public static final String networkTempDir = "/broad/shptmp/" + System.getProperty("user.name") + "/"; - public static final File networkTempDirFile = new File(networkTempDir); + public static final String networkTempDir; + public static final File networkTempDirFile; public static final File testDirFile = new File("public/testdata/"); public static final String testDir = testDirFile.getAbsolutePath() + "/"; @@ -99,6 +101,10 @@ public abstract class BaseTest { // Set the Root logger to only output warnings. logger.setLevel(Level.WARN); + networkTempDirFile = IOUtils.tempDir("temp.", ".dir", new File("/broad/shptmp/" + System.getProperty("user.name"))); + networkTempDirFile.deleteOnExit(); + networkTempDir = networkTempDirFile.getAbsolutePath() + "/"; + // find our file sources // if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) { // logger.fatal("We can't locate the reference directories. Aborting!"); @@ -233,18 +239,12 @@ public abstract class BaseTest { /** * Creates a temp file that will be deleted on exit after tests are complete. - * @param name Prefix of the file. - * @param extension Extension to concat to the end of the file. - * @return A file in the network temporary directory starting with name, ending with extension, which will be deleted after the program exits. + * @param name Name of the file. + * @return A file in the network temporary directory with name, which will be deleted after the program exits. */ - public static File createNetworkTempFile(String name, String extension) { - try { - FileUtils.forceMkdir(networkTempDirFile); - File file = File.createTempFile(name, extension, networkTempDirFile); - file.deleteOnExit(); - return file; - } catch (IOException ex) { - throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex); - } + public static File createNetworkTempFile(String name) { + File file = new File(networkTempDirFile, name); + file.deleteOnExit(); + return file; } } diff --git a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java index 48f4c3777..f68a96d26 100644 --- a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java @@ -62,7 +62,7 @@ public class JnaSessionIntegrationTest extends BaseTest { return; } - File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out"); + File outFile = createNetworkTempFile("JnaSessionIntegrationTest.out"); Session session = factory.getSession(); session.init(null); try { diff --git a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java index d98281ad3..4c7d4ce06 100644 --- a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java @@ -86,7 +86,7 @@ public class LibDrmaaIntegrationTest extends BaseTest { @Test(dependsOnMethods = { "testDrmaa" }) public void testSubmitEcho() throws Exception { - if (implementation.indexOf("LSF") >= 0) { + if (implementation.contains("LSF")) { System.err.println(" *********************************************************"); System.err.println(" ***********************************************************"); System.err.println(" **** ****"); @@ -101,7 +101,7 @@ public class LibDrmaaIntegrationTest extends BaseTest { Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER); int errnum; - File outFile = createNetworkTempFile("LibDrmaaIntegrationTest-", ".out"); + File outFile = createNetworkTempFile("LibDrmaaIntegrationTest.out"); errnum = LibDrmaa.drmaa_init(null, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN); diff --git a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java index b4fb5cfa3..21339eb46 100644 --- a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java @@ -93,7 +93,7 @@ public class LibBatIntegrationTest extends BaseTest { @Test public void testSubmitEcho() throws Exception { String queue = "hour"; - File outFile = createNetworkTempFile("LibBatIntegrationTest-", ".out"); + File outFile = createNetworkTempFile("LibBatIntegrationTest.out"); submit req = new submit(); diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala index 4ca3cbb89..149376018 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala @@ -19,7 +19,7 @@ class ExampleCountLoci extends QScript { @Output var out: File = _ - def script = { + def script() { val countLoci = new CountLoci countLoci.reference_sequence = referenceFile countLoci.input_file = bamFiles diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala index 9fdd1ba4c..7f9d3f87a 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala @@ -24,7 +24,7 @@ class ExampleCountReads extends QScript { /** * In script, you create and then add() functions to the pipeline. */ - def script = { + def script() { // Run CountReads for all bams jointly. @@ -41,6 +41,9 @@ class ExampleCountReads extends QScript { // matches the full form of the argument, but will actually be a scala List[] jointCountReads.input_file = bamFiles + // Set the memory limit. Also acts as a memory request on LSF and GridEngine. + jointCountReads.memoryLimit = 1 + // Add the newly created function to the pipeline. add(jointCountReads) @@ -51,6 +54,7 @@ class ExampleCountReads extends QScript { singleCountReads.reference_sequence = referenceFile // ':+' is the scala List append operator singleCountReads.input_file :+= bamFile + singleCountReads.memoryLimit = 1 add(singleCountReads) } } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala index d3796d350..d30668c19 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala @@ -24,7 +24,7 @@ class ExampleCustomWalker extends QScript { /** * In script, you create and then add() functions to the pipeline. */ - def script = { + def script() { val customWalker = new CommandLineGATK { // Set the name of your walker, for example this will be passed as -T MyCustomWalker this.analysis_type = "MyCustomWalker" diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala index 2f4aea755..8cb86db0b 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala @@ -33,7 +33,6 @@ class ExampleUnifiedGenotyper extends QScript { @Argument(doc="An optional list of filter expressions.", shortName="filterExpression", required=false) var filterExpressions: List[String] = Nil - // This trait allows us set the variables below in one place, // and then reuse this trait on each CommandLineGATK function below. trait UnifiedGenotyperArguments extends CommandLineGATK { diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index 913bd243c..32913deb4 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -89,7 +89,7 @@ class QCommandLine extends CommandLineProgram with Logging { private var shuttingDown = false private lazy val pluginManager = { - qScriptClasses = IOUtils.tempDir("Q-Classes", "", settings.qSettings.tempDirectory) + qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory) qScriptManager.loadScripts(scripts, qScriptClasses) new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL)) } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala index 5901cab46..f657e4be1 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala @@ -30,7 +30,7 @@ import org.broadinstitute.sting.BaseTest class ExampleCountLociPipelineTest { @Test - def testCountLoci { + def testCountLoci() { val testOut = "count.out" val spec = new PipelineTestSpec spec.name = "countloci" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala new file mode 100644 index 000000000..d9147a0ed --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.pipeline.examples + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.BaseTest + +class ExampleCountReadsPipelineTest { + @Test + def testCountReads() { + val spec = new PipelineTestSpec + spec.name = "countreads" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -I " + BaseTest.testDir + "exampleBAM.bam").mkString + PipelineTest.executeTest(spec) + } +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala new file mode 100644 index 000000000..858e1cce6 --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.pipeline.examples + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.BaseTest + +class ExampleUnifiedGenotyperPipelineTest { + @Test + def testUnifiedGenotyper() { + val spec = new PipelineTestSpec + spec.name = "unifiedgenotyper" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -I " + BaseTest.testDir + "exampleBAM.bam", + " -filter QD", + " -filterExpression 'QD < 2.0'").mkString + PipelineTest.executeTest(spec) + } +} From 6059ca76e8939f77e3e7ebe940912f9c47181db8 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Fri, 16 Dec 2011 23:00:16 -0500 Subject: [PATCH 22/36] Removing cruft that snuck in last commit. --- .../org/broadinstitute/sting/BaseTest.java | 8 ---- .../ExampleCountReadsPipelineTest.scala | 24 ---------- .../ExampleUnifiedGenotyperPipelineTest.scala | 48 ------------------- 3 files changed, 80 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 0f59131f5..8e218f950 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -1,20 +1,12 @@ package org.broadinstitute.sting; -import org.apache.commons.io.FileUtils; import org.apache.log4j.*; import org.apache.log4j.spi.LoggingEvent; -import org.apache.xmlbeans.impl.common.IOUtil; import org.broadinstitute.sting.commandline.CommandLineUtils; -import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.io.IOUtils; -import org.testng.Assert; -import javax.swing.*; import java.io.*; -import java.math.BigInteger; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.*; /** diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala index d9147a0ed..8b286f090 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala @@ -24,30 +24,6 @@ package org.broadinstitute.sting.queue.pipeline.examples -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - import org.testng.annotations.Test import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala index 858e1cce6..d50673a1a 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala @@ -24,54 +24,6 @@ package org.broadinstitute.sting.queue.pipeline.examples -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - import org.testng.annotations.Test import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest From 6dc52d42bfbc8777dcc9307cb68463a461d811f7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 18 Dec 2011 00:01:42 -0500 Subject: [PATCH 23/36] Implemented the proper QUAL calculation for multi-allelic calls. Integration tests pass except for the ones making multi-allelic calls (duh) and one of the SLOD tests (which used to print 0 when one of the LODs was NaN but now we just don't print the SB annotation for that record). --- .../AlleleFrequencyCalculationResult.java | 4 + .../genotyper/ExactAFCalculationModel.java | 19 ++-- .../genotyper/UnifiedGenotyperEngine.java | 87 ++++++++++--------- .../UnifiedGenotyperIntegrationTest.java | 6 +- 4 files changed, 64 insertions(+), 52 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java index 66106f658..cc72fde11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java @@ -34,9 +34,13 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; */ public class AlleleFrequencyCalculationResult { + // note that the cell at position zero in the likelihoods/posteriors array is actually probability of non-ref (since it's marginalized over all alleles) final double[][] log10AlleleFrequencyLikelihoods; final double[][] log10AlleleFrequencyPosteriors; + double log10LikelihoodOfAFzero = 0.0; + double log10PosteriorOfAFzero = 0.0; + AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) { log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1]; log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index bc27916dd..aa743f86f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -407,14 +407,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { nonRefAlleles++; } - // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs - for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { - int AC = set.ACcounts.getCounts()[i]; - result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK); + // for k=0, we don't want to put that value into the likelihoods/posteriors matrix, but instead want to set the value in the results object + if ( nonRefAlleles == 0 ) { + result.log10LikelihoodOfAFzero = log10LofK; + result.log10PosteriorOfAFzero = log10LofK + log10AlleleFrequencyPriors[0][0]; + } else { + // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs + for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + int AC = set.ACcounts.getCounts()[i]; + result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK); - // for k=0 we still want to use theta - final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; - result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); + final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; + result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 7e5f388b2..533bf0c68 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -75,8 +75,9 @@ public class UnifiedGenotyperEngine { // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); - // the allele frequency likelihoods (allocated once as an optimization) + // the allele frequency likelihoods and posteriors (allocated once as an optimization) private ThreadLocal alleleFrequencyCalculationResult = new ThreadLocal(); + private ThreadLocal posteriorsArray = new ThreadLocal(); // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything private final double[][] log10AlleleFrequencyPriorsSNPs; @@ -289,7 +290,9 @@ public class UnifiedGenotyperEngine { if ( afcm.get() == null ) { afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N)); + posteriorsArray.set(new double[N + 2]); } + AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get(); // don't try to genotype too many alternate alleles if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) { @@ -307,24 +310,20 @@ public class UnifiedGenotyperEngine { } // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); // is the most likely frequency conformation AC=0 for all alternate alleles? boolean bestGuessIsRef = true; - // which alternate allele has the highest MLE AC? - int indexOfHighestAlt = -1; - int alleleCountOfHighestAlt = -1; - // determine which alternate alleles have AF>0 boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { - int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]); + int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[i]); // if the most likely AC is not 0, then this is a good alternate allele to use - if ( indexOfBestAC != 0 ) { + if ( indexOfBestAC != 0 && (AFresult.log10AlleleFrequencyPosteriors[i][0] != AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED || AFresult.log10AlleleFrequencyPosteriors[i][indexOfBestAC] > AFresult.log10PosteriorOfAFzero) ) { altAllelesToUse[i] = true; bestGuessIsRef = false; } @@ -332,19 +331,14 @@ public class UnifiedGenotyperEngine { else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { altAllelesToUse[i] = true; } - - // keep track of the "best" alternate allele to use - if ( indexOfBestAC > alleleCountOfHighestAlt) { - alleleCountOfHighestAlt = indexOfBestAC; - indexOfHighestAlt = i; - } } - // calculate p(f>0) - // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]); + // calculate p(f>0): + // because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them + double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get()); + // TODO -- why not just 1.0 - normalizedPosteriors[0]? double sum = 0.0; - for (int i = 1; i <= N; i++) + for (int i = 1; i <= N+1; i++) // N+1 because the cell at position zero in the original posteriors array is actually probability of non-ref (since it's marginalized over all alleles) sum += normalizedPosteriors[i]; double PofF = Math.min(sum, 1.0); // deal with precision errors @@ -352,15 +346,17 @@ public class UnifiedGenotyperEngine { if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; + phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { - sum = 0.0; + sum = AFresult.log10AlleleFrequencyPosteriors[0][0]; + if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + sum = 0.0; for (int i = 1; i <= N; i++) { - if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) break; - sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i]; + sum += AFresult.log10AlleleFrequencyPosteriors[0][i]; } phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } @@ -418,31 +414,31 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); - //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); + //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; + double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); - //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; - double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); + //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); + double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero; + double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); - //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; - double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); + //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); + double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero; + double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; @@ -455,7 +451,8 @@ public class UnifiedGenotyperEngine { strandScore *= 10.0; //logger.debug(String.format("SLOD=%f", strandScore)); - attributes.put("SB", strandScore); + if ( !Double.isNaN(strandScore) ) + attributes.put("SB", strandScore); } // finish constructing the resulting VC @@ -478,6 +475,12 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } + private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) { + normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero; + System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1); + return MathUtils.normalizeFromLog10(normalizedPosteriors); + } + private Map getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { Map stratifiedContexts = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 6041f80b8..7fde73f0a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -115,7 +115,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCallingParameters() { HashMap e = new HashMap(); e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" ); - e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" ); + e.put( "--computeSLOD", "6172d2f3d370132f4c57a26aa94c256e" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead")); + Arrays.asList("8bd5028cf294850b8a95b3c0af23d728")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51")); + Arrays.asList("814dcd66950635a870602d90a1618cce")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } From c5ffe0ab0438a14d7e4ff90a2c443ba6dac4e989 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 18 Dec 2011 00:31:47 -0500 Subject: [PATCH 24/36] No reason to sum the normalized posteriors array to get Pr(AF>0) given that we can just compute 1.0 - array[0]. Integration tests change only because of trivial precision artifacts for reference calls using EMIT_ALL_SITES. --- .../genotyper/UnifiedGenotyperEngine.java | 20 ++++++++----------- .../UnifiedGenotyperIntegrationTest.java | 4 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 533bf0c68..aa9be97a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -335,12 +335,8 @@ public class UnifiedGenotyperEngine { // calculate p(f>0): // because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them - double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get()); - // TODO -- why not just 1.0 - normalizedPosteriors[0]? - double sum = 0.0; - for (int i = 1; i <= N+1; i++) // N+1 because the cell at position zero in the original posteriors array is actually probability of non-ref (since it's marginalized over all alleles) - sum += normalizedPosteriors[i]; - double PofF = Math.min(sum, 1.0); // deal with precision errors + final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get()); + final double PofF = 1.0 - normalizedPosteriors[0]; double phredScaledConfidence; if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { @@ -350,7 +346,7 @@ public class UnifiedGenotyperEngine { } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { - sum = AFresult.log10AlleleFrequencyPosteriors[0][0]; + double sum = AFresult.log10AlleleFrequencyPosteriors[0][0]; if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) sum = 0.0; for (int i = 1; i <= N; i++) { @@ -370,7 +366,7 @@ public class UnifiedGenotyperEngine { } // strip out any alleles that aren't going to be used in the VariantContext - List myAlleles; + final List myAlleles; if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { myAlleles = new ArrayList(vc.getAlleles().size()); myAlleles.add(vc.getReference()); @@ -384,8 +380,8 @@ public class UnifiedGenotyperEngine { } // start constructing the resulting VC - GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); + final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); builder.log10PError(phredScaledConfidence/-10.0); if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); @@ -396,14 +392,14 @@ public class UnifiedGenotyperEngine { } // create the genotypes - GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles); + final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles); // print out stats if we have a writer if ( verboseWriter != null && !limitedContext ) printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model); // *** note that calculating strand bias involves overwriting data structures, so we do that last - HashMap attributes = new HashMap(); + final HashMap attributes = new HashMap(); // if the site was downsampled, record that fact if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 7fde73f0a..e049af064 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testOutputParameter() { HashMap e = new HashMap(); e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "274bd9d1b9c7857690fa5f0228ffc6d7" ); - e.put( "--output_mode EMIT_ALL_SITES", "594c6d3c48bbc73289de7727d768644d" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "d971d392956aea69c3707da64d24485b" ); + e.put( "--output_mode EMIT_ALL_SITES", "21993e71ca1a06a0d1f11d58e3cc26c3" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( From 76bd13a1ed5fe25f028cae8add881492ac7dd484 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 18 Dec 2011 01:13:49 -0500 Subject: [PATCH 26/36] Forgot to update the unit test --- .../walkers/genotyper/ExactAFCalculationModelUnitTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index dec6ecb79..6f259f699 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -97,7 +97,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]); - Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + + if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) { + Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero); + } else { + Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + } } } } From 953998dcd0d31280ea876cdcdcf46b0d66c6b16e Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sun, 18 Dec 2011 14:38:59 -0500 Subject: [PATCH 27/36] Now that getSampleDB is public in the walker base class this override in VariantAnnotator isn't necessary. --- .../sting/gatk/walkers/annotator/VariantAnnotator.java | 5 ----- .../sting/gatk/walkers/variantutils/VariantsToTable.java | 6 +++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 94902e828..4fa6d0bbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -194,11 +194,6 @@ public class VariantAnnotator extends RodWalker implements Ann System.exit(0); } - @Override - public SampleDB getSampleDB() { - return super.getSampleDB(); - } - /** * Prepare the output file and the list of available features. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index f1f61d071..4b3aa4864 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -127,12 +127,12 @@ public class VariantsToTable extends RodWalker { * multi-allelic INFO field values can be lists of values. */ @Advanced - @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) - public boolean keepMultiAllelic = false; + @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) + public boolean keepMultiAllelic = false; @Hidden @Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false) - public boolean logACSum = false; + public boolean logACSum = false; /** * By default, this tool throws a UserException when it encounters a field without a value in some record. This From bc842ab3a5d60095aadff6bae4d271615d0ea294 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sun, 18 Dec 2011 15:27:23 -0500 Subject: [PATCH 28/36] Adding option to VariantAnnotator to do strict allele matching when annotating with comp track concordance. --- .../gatk/walkers/annotator/VariantAnnotator.java | 4 ++++ .../walkers/annotator/VariantAnnotatorEngine.java | 14 ++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 4fa6d0bbf..69560c7cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -170,6 +170,9 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") public double minGenotypeQualityP = 0.0; + @Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp tracks that exactly match both reference and alternate alleles will be counted as concordant", required=false) + private boolean requireStrictAlleleMatch = false; + private VariantAnnotatorEngine engine; private Collection indelBufferContext; @@ -211,6 +214,7 @@ public class VariantAnnotator extends RodWalker implements Ann else engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit()); engine.initializeExpressions(expressionsToUse); + engine.setRequireStrictAlleleMatch(requireStrictAlleleMatch); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index d4442dc5d..98d2fe17b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -47,9 +47,11 @@ public class VariantAnnotatorEngine { private List requestedGenotypeAnnotations; private List requestedExpressions = new ArrayList(); - private HashMap, String> dbAnnotations = new HashMap, String>(); - private AnnotatorCompatibleWalker walker; - private GenomeAnalysisEngine toolkit; + private final HashMap, String> dbAnnotations = new HashMap, String>(); + private final AnnotatorCompatibleWalker walker; + private final GenomeAnalysisEngine toolkit; + + private boolean requireStrictAlleleMatch = false; protected static class VAExpression { @@ -163,6 +165,10 @@ public class VariantAnnotatorEngine { return descriptions; } + public void setRequireStrictAlleleMatch( final boolean requireStrictAlleleMatch ) { + this.requireStrictAlleleMatch = requireStrictAlleleMatch; + } + public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); @@ -197,7 +203,7 @@ public class VariantAnnotatorEngine { } else { boolean overlapsComp = false; for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) { - if ( !comp.isFiltered() ) { + if ( !comp.isFiltered() && ( !requireStrictAlleleMatch || comp.getAlleles().equals(vc.getAlleles()) ) ) { overlapsComp = true; break; } From 1ead00cac5d027b45633b58bb8701f07f57c7166 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Sun, 18 Dec 2011 19:04:26 -0500 Subject: [PATCH 29/36] New fork of SamFileHeaderMerger should be cached at the thread level to enable fast (and valid) thread lookups. --- .../gatk/datasources/reads/SAMDataSource.java | 77 +++++++++++-------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 75a851d65..2e243b847 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -100,11 +100,6 @@ public class SAMDataSource { */ private final Map readerPositions = new HashMap(); - /** - * Cached representation of the merged header used to generate a merging iterator. - */ - private final SamFileHeaderMerger headerMerger; - /** * The merged header. */ @@ -300,9 +295,8 @@ public class SAMDataSource { initializeReaderPositions(readers); - headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); - mergedHeader = headerMerger.getMergedHeader(); - hasReadGroupCollisions = headerMerger.hasReadGroupCollisions(); + mergedHeader = readers.getMergedHeader(); + hasReadGroupCollisions = readers.hasReadGroupCollisions(); readProperties = new ReadProperties( samFiles, @@ -327,9 +321,9 @@ public class SAMDataSource { List readGroups = reader.getFileHeader().getReadGroups(); for(SAMReadGroupRecord readGroup: readGroups) { - if(headerMerger.hasReadGroupCollisions()) { - mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId())); - mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId()); + if(hasReadGroupCollisions) { + mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId())); + mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId()); } else { mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); @@ -562,7 +556,7 @@ public class SAMDataSource { */ private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) { // Set up merging to dynamically merge together multiple BAMs. - MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true); + MergingSamRecordIterator mergingIterator = readers.createMergingIterator(); for(SAMReaderID id: getReaderIDs()) { CloseableIterator iterator = null; @@ -707,6 +701,11 @@ public class SAMDataSource { * A collection of readers derived from a reads metadata structure. */ private class SAMReaders implements Iterable { + /** + * Cached representation of the merged header used to generate a merging iterator. + */ + private final SamFileHeaderMerger headerMerger; + /** * Internal storage for a map of id -> reader. */ @@ -798,6 +797,11 @@ public class SAMDataSource { } if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); + + Collection headers = new LinkedList(); + for(SAMFileReader reader: readers.values()) + headers.add(reader.getFileHeader()); + headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); } final private void printReaderPerformance(final int nExecutedTotal, @@ -814,7 +818,37 @@ public class SAMDataSource { nExecutedInTick, tickDurationInSec, nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, nRemaining, estTimeToComplete, estTimeToComplete / 60)); + } + /** + * Return the header derived from the merging of these BAM files. + * @return the merged header. + */ + public SAMFileHeader getMergedHeader() { + return headerMerger.getMergedHeader(); + } + + /** + * Do multiple read groups collide in this dataset? + * @return True if multiple read groups collide; false otherwis. + */ + public boolean hasReadGroupCollisions() { + return headerMerger.hasReadGroupCollisions(); + } + + /** + * Get the newly mapped read group ID for the given read group. + * @param readerID Reader for which to discern the transformed ID. + * @param originalReadGroupID Original read group. + * @return Remapped read group. + */ + public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) { + SAMFileHeader header = readers.get(readerID).getFileHeader(); + return headerMerger.getReadGroupId(header,originalReadGroupID); + } + + public MergingSamRecordIterator createMergingIterator() { + return new MergingSamRecordIterator(headerMerger,readers.values(),true); } /** @@ -866,25 +900,6 @@ public class SAMDataSource { public boolean isEmpty() { return readers.isEmpty(); } - - /** - * Gets all the actual readers out of this data structure. - * @return A collection of the readers. - */ - public Collection values() { - return readers.values(); - } - - /** - * Gets all the actual readers out of this data structure. - * @return A collection of the readers. - */ - public Collection headers() { - ArrayList headers = new ArrayList(readers.size()); - for (SAMFileReader reader : values()) - headers.add(reader.getFileHeader()); - return headers; - } } class ReaderInitializer implements Callable { From 5b678e3b94a086c40a8cd7baf18dba713d07fe50 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sun, 18 Dec 2011 12:26:06 -0500 Subject: [PATCH 30/36] Remove ClippingOp UnitTests * all testing functionality is in the ReadClipperUnitTest, no need to double test. * class and package naming cleanup --- .../sting/gatk/walkers/ClipReadsWalker.java | 6 +- .../{clipreads => clipping}/ClippingOp.java | 2 +- .../ClippingRepresentation.java | 2 +- .../{clipreads => clipping}/ReadClipper.java | 2 +- .../ReadClipperTestUtils.java} | 8 ++- .../ReadClipperUnitTest.java | 24 +++---- .../utils/clipreads/ClippingOpUnitTest.java | 67 ------------------- 7 files changed, 23 insertions(+), 88 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/{clipreads => clipping}/ClippingOp.java (99%) rename public/java/src/org/broadinstitute/sting/utils/{clipreads => clipping}/ClippingRepresentation.java (95%) rename public/java/src/org/broadinstitute/sting/utils/{clipreads => clipping}/ReadClipper.java (99%) rename public/java/test/org/broadinstitute/sting/utils/{clipreads/ClipReadsTestUtils.java => clipping/ReadClipperTestUtils.java} (98%) rename public/java/test/org/broadinstitute/sting/utils/{clipreads => clipping}/ReadClipperUnitTest.java (94%) delete mode 100644 public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index d1148cbd5..ff3867120 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -38,9 +38,9 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.clipreads.ClippingOp; -import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; -import org.broadinstitute.sting.utils.clipreads.ReadClipper; +import org.broadinstitute.sting.utils.clipping.ClippingOp; +import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; +import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 0bb8c3e6f..f440eda5d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.Cigar; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java index d574ba2f0..c9d8730d1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; /** * How should we represent a clipped bases in a read? diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index e6b3ce929..6e3f37980 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java rename to public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index 048d9d8e0..18108e0a1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; @@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.Stack; /** * Created by IntelliJ IDEA. @@ -17,7 +19,7 @@ import java.util.*; * Time: 6:45 AM * To change this template use File | Settings | File Templates. */ -public class ClipReadsTestUtils { +public class ReadClipperTestUtils { //Should contain all the utils needed for tests to mass produce //reads, cigars, and other needed classes diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java similarity index 94% rename from public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index fcde5f360..3b4b0abce 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; @@ -50,13 +50,13 @@ public class ReadClipperUnitTest extends BaseTest { @BeforeClass public void init() { - cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize); + cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize); } @Test(enabled = true) public void testHardClipBothEndsByReferenceCoordinates() { for (Cigar cigar : cigarList) { - GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); int readLength = alnStart - alnEnd; @@ -71,7 +71,7 @@ public class ReadClipperUnitTest extends BaseTest { @Test(enabled = true) public void testHardClipByReadCoordinates() { for (Cigar cigar : cigarList) { - GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); for (int i=0; i %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java deleted file mode 100644 index dd674ff1c..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.broadinstitute.sting.utils.clipreads; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - - -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 11/27/11 - * Time: 5:17 AM - * To change this template use File | Settings | File Templates. - */ -public class ClippingOpUnitTest extends BaseTest { - - ClippingOp clippingOp; - GATKSAMRecord read; - - @BeforeTest - public void init() { - read = ClipReadsTestUtils.makeRead(); - } - - @Test - private void testHardClip() { -// List testList = new LinkedList(); -// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start -// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end -// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start -// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end -// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start -// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end -// -// for (TestParameter p : testList) { -// init(); -// clippingOp = new ClippingOp(p.inputStart, p.inputStop); -// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar); -// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read), -// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), -// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), -// p.cigar); -// } - - } - - @Test - private void testSoftClip() { -// List testList = new LinkedList(); -// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start -// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end -// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start -// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end -// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start -// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end -// -// for (TestParameter p : testList) { -// init(); -// clippingOp = new ClippingOp(p.inputStart, p.inputStop); -// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar); -// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read), -// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar); -// } - - } -} From 3069a689fe7dcca14200a6ed1ba1fdff3a31dbf8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Dec 2011 10:04:33 -0500 Subject: [PATCH 32/36] Bug fix: if there are multiple records at a given position, it turns out that SelectVariants would drop all variants that follow after one that fails filters (instead of dropping just the failing one). Added an integration test to cover this case. --- .../walkers/variantutils/SelectVariants.java | 20 +++++++++++-------- .../SelectVariantsIntegrationTest.java | 13 ++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 0af351750..6d94ffe6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -493,12 +493,12 @@ public class SelectVariants extends RodWalker implements TreeR if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) - return 0; + continue; } if (CONCORDANCE_ONLY) { Collection compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) - return 0; + continue; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) @@ -512,16 +512,20 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, samples); if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { + boolean failedJexlMatch = false; for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { if ( !VariantContextUtils.match(sub, jexl) ) { - return 0; + failedJexlMatch = true; + break; } } - if (SELECT_RANDOM_NUMBER) { - randomlyAddVariant(++variantNumber, sub); - } - else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { - vcfWriter.add(sub); + if ( !failedJexlMatch ) { + if (SELECT_RANDOM_NUMBER) { + randomlyAddVariant(++variantNumber, sub); + } + else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { + vcfWriter.add(sub); + } } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 72a07bd0e..042de2a27 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testUsingDbsnpName--" + testFile, spec); } + @Test + public void testMultipleRecordsAtOnePosition() { + String testFile = validationDataLocation + "selectVariants.onePosition.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("20b52c96f5c48258494d072752b53693") + ); + + executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec); + } + @Test public void testParallelization() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; From 5383c506540ca58827d5d798d3390780d5e57512 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 19 Dec 2011 10:08:38 -0500 Subject: [PATCH 33/36] Protect ourselves when iteration is present but there's only a single iteration in queueJobReport.R --- .../broadinstitute/sting/queue/util/queueJobReport.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R index aff783b8e..d5ee3626f 100644 --- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R +++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R @@ -12,7 +12,7 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "Q-8271@gsa2.jobreport.txt" + inputFileName = "Q-26618@gsa4.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA @@ -129,9 +129,11 @@ plotGroup <- function(groupTable) { # as above, but averaging over all iterations groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") if ( dim(sub)[1] > 1 ) { - sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) - textplot(as.data.frame(sum), show.rownames=F) - title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + try({ # need a try here because we will fail to reduce when there's just a single iteration + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + }, silent=T) } } @@ -193,6 +195,7 @@ plotJobsGantt(gatkReportData, F, F) plotProgressByTime(gatkReportData) plotTimeByHost(gatkReportData) for ( group in gatkReportData ) { + print(group) plotGroup(group) } From 5fd19ae734b8cc94ee276c8909b02b2e47c787a1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Dec 2011 10:19:00 -0500 Subject: [PATCH 34/36] Commented exactly how the results are represented from the exact model so developers can know how to use them. --- .../genotyper/AlleleFrequencyCalculationResult.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java index cc72fde11..ed80dce0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java @@ -34,10 +34,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; */ public class AlleleFrequencyCalculationResult { - // note that the cell at position zero in the likelihoods/posteriors array is actually probability of non-ref (since it's marginalized over all alleles) + // IMPORTANT NOTE: + // These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N). + // For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that + // the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may + // be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0). + // In the bi-allelic case (where there are no other alternate alleles over which to marginalize), + // the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED. final double[][] log10AlleleFrequencyLikelihoods; final double[][] log10AlleleFrequencyPosteriors; + // These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles) double log10LikelihoodOfAFzero = 0.0; double log10PosteriorOfAFzero = 0.0; From 16cc2b864e3098e96eaff5727a5583775e392442 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 19 Dec 2011 13:41:47 +0100 Subject: [PATCH 35/36] - Corrected bug causing cases where both parents are HET to be accounted twice in the TDT calculation - Adapted TDT Integration test to corrected version of TDT Signed-off-by: Ryan Poplin --- .../annotator/TransmissionDisequilibriumTest.java | 15 +++++++-------- .../VariantAnnotatorIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 6cc8923e8..ecdde1e4f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT private double calculateTDT( final VariantContext vc, final Set triosToTest ) { - final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM); - final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM); + final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET); + final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); - final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET); - final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF); + final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); return (numer * numer) / denom; } - private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { - final double likelihoodVector[] = new double[triosToTest.size() * 2]; + private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) { + final double likelihoodVector[] = new double[triosToTest.size()]; int iii = 0; for( final Sample child : triosToTest ) { final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); - likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; - likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx]; } return MathUtils.sumLog10(likelihoodVector); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index ffb9aedcc..245fda3c7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTDTAnnotation() { - final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; + final String MD5 = "204e67536a17af7eaa6bf0a910818997"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,