From ab1c48745beca74be9382e4b2dc3793488278d41 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Mar 2012 12:13:52 -0400 Subject: [PATCH 01/29] Need to catch RuntimeExceptions coming out of Picard too so that they show up as UserErrors (some BAM errors are thrown as REs). --- .../gatk/iterators/MalformedBAMErrorReformatingIterator.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java index f5dee4961..ca155a27f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java @@ -6,6 +6,7 @@ import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; +import java.io.IOException; import java.util.Iterator; /** @@ -23,7 +24,7 @@ public class MalformedBAMErrorReformatingIterator implements CloseableIterator Date: Wed, 21 Mar 2012 12:14:31 -0400 Subject: [PATCH 02/29] Minor cleanup --- .../iterators/MalformedBAMErrorReformatingIterator.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java index ca155a27f..18bf16d71 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java @@ -1,12 +1,10 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.SAMFormatException; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; -import java.io.IOException; import java.util.Iterator; /** @@ -24,7 +22,7 @@ public class MalformedBAMErrorReformatingIterator implements CloseableIterator Date: Wed, 21 Mar 2012 12:42:13 -0400 Subject: [PATCH 03/29] Bug fix: merge NO_VARIATION records with those of another type. The sad part is that this WAS covered by integration tests but someone updated the MD5s without actually paying attention... --- .../gatk/walkers/variantutils/CombineVariants.java | 13 +++++++++++++ .../CombineVariantsIntegrationTest.java | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 684b9102a..3066b0bc6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -243,6 +243,19 @@ public class CombineVariants extends RodWalker { if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + + // TODO -- clean this up in a refactoring + // merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type) + if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) { + final List refs = VCsByType.remove(VariantContext.Type.NO_VARIATION); + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) { + VCsByType.get(type).addAll(refs); + break; + } + } + } + // iterate over the types so that it's deterministic for (VariantContext.Type type : VariantContext.Type.values()) { if (VCsByType.containsKey(type)) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index d74aac79d..5282c9e58 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("ab72f4bfb16d3894942149173a087647")); + Arrays.asList("ee43a558fd3faeaa447acab89f0001d5")); executeTest("threeWayWithRefs", spec); } From 58245bfa2f753ce0cc4c58f95b59f6e0a6963fe3 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Mar 2012 12:44:09 -0400 Subject: [PATCH 04/29] Bug fix: check to see whether there's a BasePileup before asking for one. --- .../broadinstitute/sting/gatk/walkers/annotator/LowMQ.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 753740258..4a0c7a6da 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -31,8 +31,10 @@ public class LowMQ extends InfoFieldAnnotation { double total = 0; for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - ReadBackedPileup pileup = sample.getValue().getBasePileup(); - for (PileupElement p : pileup ) + if ( !sample.getValue().hasBasePileup() ) + continue; + + for ( PileupElement p : sample.getValue().getBasePileup() ) { if ( p.getMappingQual() == 0 ) { mq0 += 1; } if ( p.getMappingQual() <= 10 ) { mq10 += 1; } From b8cd959461ab7e55c07139f9dcaebc0851406ffd Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 22 Mar 2012 10:06:22 -0400 Subject: [PATCH 05/29] Potential corner condition bug fix: protect against null pointer exceptions when computing consensus indel bases when UG is discovering alt alleles. If an alt allele has non-standard bases, skip allele gracefully instead of adding null object into list --- .../genotyper/IndelGenotypeLikelihoodsCalculationModel.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 1b73ef1d7..21f11d2ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -240,6 +240,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood refAllele = Allele.create(refBases, true); altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } + else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case if (Allele.acceptableAlleleBases(s)) { @@ -247,6 +248,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood altAllele = Allele.create(s, false); stop = loc.getStart(); } + else continue; // go on to next allele if consensus insertion has any non-standard base. } From 1dfaacfeb5f1e7b31b10be94605f15622d2a4cbc Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Thu, 22 Mar 2012 12:40:15 -0400 Subject: [PATCH 07/29] Check for consistency of the BAM and VCF sample names, with a command line disable to throw if you know what you are doing --- .../phasing/ReadBackedPhasingWalker.java | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 9470ce2f4..734ade376 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -36,8 +36,10 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -121,8 +123,11 @@ public class ReadBackedPhasingWalker extends RodWalker samplesToPhase = null; + protected Set samplesToPhase = null; + + @Hidden + @Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false) + private boolean permitNoSampleOverlap = false; private GenomeLoc mostDownstreamLocusReached = null; @@ -205,8 +210,18 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); - writer.writeHeader(new VCFHeader(hInfo, samples)); + Set vcfSamples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); + writer.writeHeader(new VCFHeader(hInfo, vcfSamples)); + + Set readSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + readSamples.retainAll(vcfSamples); + if (readSamples.isEmpty()) { + String noPhaseString = "No common samples in VCF and BAM headers" + (samplesToPhase == null ? "" : " (limited to sampleToPhase parameters)") + ", so nothing could possibly be phased!"; + if (permitNoSampleOverlap) + logger.warn(noPhaseString); + else + throw new UserException(noPhaseString); + } } public boolean generateExtendedEvents() { From b9b9219ac7db434885ff118330ac93e244398f49 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Thu, 22 Mar 2012 17:40:21 -0400 Subject: [PATCH 08/29] Added respectPhaseInInput flag to RBP and integration tests --- .../phasing/ReadBackedPhasingWalker.java | 22 +++++++++++++++++-- .../ReadBackedPhasingIntegrationTest.java | 20 +++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 734ade376..dc5dfc907 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -129,6 +129,9 @@ public class ReadBackedPhasingWalker extends RodWalker unphasedSiteQueue = null; @@ -487,6 +490,13 @@ public class ReadBackedPhasingWalker extends RodWalker readsAtHetSites = null; + private void clearFields() { + hetGenotypes = null; + prevHetAndInteriorIt = null; + phasingSiteIndex = -1; + readsAtHetSites = null; + } + public boolean hasPreviousHets() { return phasingSiteIndex > 0; } @@ -513,12 +523,20 @@ public class ReadBackedPhasingWalker extends RodWalker= 10; cacheWindow = 20000; has inconsistent sites", spec); } + @Test + public void test7() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10) + + " -L chr20:332341-802503", + 1, + Arrays.asList("c37548b333b65f58d0edfc5c2a62a28a")); + executeTest("Use trio-phased VCF, but ignore its phasing [TEST SEVEN]", spec); + } + + @Test + public void test8() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10) + + " -L chr20:332341-802503" + " -respectPhaseInInput", + 1, + Arrays.asList("dfc7cdddd702e63d46d04f61a3ecd720")); + executeTest("Use trio-phased VCF, and respect its phasing [TEST EIGHT]", spec); + } + } From 6c2290fb6e54b01278ac294da8028ef14758ecc8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 18:46:37 -0400 Subject: [PATCH 09/29] Performance optimization for gsa.read.gatkreport.R -- instead of using y = rbind(x, y), which is O(n^2) in a loop when processing lines into a data structure in R, preallocate a matrix and explicitly assign each row to x. This results in a radical performance improvement when reading large tables into R. It's possible with this optimization to read in a 70MB table for variantQCReport.R with 200K lines for 800 samples. --- .../utils/R/gsalib/R/gsa.read.gatkreport.R | 124 ++++++++++-------- 1 file changed, 66 insertions(+), 58 deletions(-) diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R index 64fbcc50a..4c228ccb4 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R @@ -99,69 +99,77 @@ gsa.read.gatkreportv0 <- function(lines) { # Load all GATKReport v1 tables from file gsa.read.gatkreportv1 <- function(lines) { + #print("loading with optimized v1 reader") + nLines = length(lines) + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = NULL; + version = ""; + rowCount = 0 + headerRowCount = -1; + + finishTable <- function() { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows[1:rowCount,], tableEnv); + } + + for (line in lines) { - tableEnv = new.env(); - - tableName = NA; - tableHeader = c(); - tableRows = c(); - version = ""; - headerRowCount = -1; - - for (line in lines) { - - if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) { - version = "v1.0"; - headerRowCount = 0; - } - - if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) { - #print("Trying to start a table with line:"); - #print(line); - - #Get table header - headerFields = unlist(strsplit(line, ":")); - - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); - } - - tableName = headerFields[3]; - tableHeader = c(); - tableRows = c(); - - columnStarts = c(); - - } - - if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) { - headerRowCount = headerRowCount+1; - #print("Header Row count is at:") - #print(headerRowCount); - } else if (!is.na(tableName)) { - if ( version == "v1.0") { - if (length(tableHeader) == 0) { - headerChars = unlist(strsplit(line, "")); - # Find the first position of non space characters, excluding the first character - columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); - } - - row = .gsa.splitFixedWidth(line, columnStarts); - } - - if (length(tableHeader) == 0) { - tableHeader = row; - } else if ( nchar(line) > 0 ) { - tableRows = rbind(tableRows, row); - } - } + if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) { + version = "v1.0"; + headerRowCount = 0; } - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) { + #print("Trying to start a table with line:"); + #print(line); + + #Get table header + headerFields = unlist(strsplit(line, ":")); + + if (!is.na(tableName)) { + finishTable() + } + + tableName = headerFields[3]; + tableHeader = c(); + tableRows = NULL + rowCount = 0 + + columnStarts = c(); } - gatkreport = as.list(tableEnv, all.names=TRUE); + if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) { + headerRowCount = headerRowCount+1; + #print("Header Row count is at:") + #print(headerRowCount); + } else if (!is.na(tableName)) { + if ( version == "v1.0") { + if (length(tableHeader) == 0) { + headerChars = unlist(strsplit(line, "")); + # Find the first position of non space characters, excluding the first character + columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); + tableRows = matrix(nrow=nLines, ncol=length(columnStarts)+1); + } + + row = .gsa.splitFixedWidth(line, columnStarts); + } + + if (length(tableHeader) == 0) { + tableHeader = row; + } else if ( nchar(line) > 0 ) { + rowCount = rowCount + 1 + tableRows[rowCount,] <- row + } + } + } + + if (!is.na(tableName)) { + finishTable() + } + + gatkreport = as.list(tableEnv, all.names=TRUE); } # Load all GATKReport tables from a file From bd5b6d1aba699fa399f69205da04f0985add9bc3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 19:38:03 -0400 Subject: [PATCH 10/29] Remove no longer in use Eval modules from VariantEval -- No more IndelLengthHistogram (superceded by IndelSummary in subsequent commit) -- No more SamplePreviousGenotypes or PhaseStats -- No more MultiallelicAFs --- .../evaluators/IndelLengthHistogram.java | 111 ------------- .../evaluators/MultiallelicAFs.java | 154 ------------------ .../varianteval/evaluators/PhaseStats.java | 54 ------ .../evaluators/SamplePreviousGenotypes.java | 30 ---- 4 files changed, 349 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java deleted file mode 100755 index 6cf8b7c2c..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ /dev/null @@ -1,111 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -/** - * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl - * - * @Author chartl - * @Date May 26, 2010 - */ -@Analysis(name = "Indel length histograms", description = "Shows the distribution of insertion/deletion event lengths (negative for deletion, positive for insertion)") -public class IndelLengthHistogram extends VariantEvaluator { - private static final int SIZE_LIMIT = 100; - @DataPoint(description="Histogram of indel lengths") - IndelHistogram indelHistogram = new IndelHistogram(SIZE_LIMIT); - - /* - * Indel length histogram table object - */ - - static class IndelHistogram implements TableType { - private Integer[] colKeys; - private int limit; - private String[] rowKeys = {"EventLength"}; - private Integer[] indelHistogram; - - public IndelHistogram(int limit) { - colKeys = initColKeys(limit); - indelHistogram = initHistogram(limit); - this.limit = limit; - } - - public Object[] getColumnKeys() { - return colKeys; - } - - public Object[] getRowKeys() { - return rowKeys; - } - - public Object getCell(int row, int col) { - return indelHistogram[col]; - } - - private Integer[] initColKeys(int size) { - Integer[] cK = new Integer[size*2+1]; - int index = 0; - for ( int i = -size; i <= size; i ++ ) { - cK[index] = i; - index++; - } - - return cK; - } - - private Integer[] initHistogram(int size) { - Integer[] hist = new Integer[size*2+1]; - for ( int i = 0; i < 2*size+1; i ++ ) { - hist[i] = 0; - } - - return hist; - } - - public String getName() { return "indelHistTable"; } - - public void update(int eLength) { - indelHistogram[len2index(eLength)]++; - } - - private int len2index(int len) { - if ( len > limit || len < -limit ) { - throw new ReviewedStingException("Indel length exceeds limit of "+limit+" please increase indel limit size"); - } - return len + limit; - } - } - - public boolean enabled() { return true; } - - public String getName() { return "IndelLengthHistogram"; } - - public int getComparisonOrder() { return 1; } // need only the evals - - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - if ( vc1.isIndel() && vc1.isPolymorphicInSamples() ) { - - if ( ! vc1.isBiallelic() ) { - //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); - return vc1.toString(); // biallelic sites are output - } - - // only count simple insertions/deletions, not complex indels - if ( vc1.isSimpleInsertion() ) { - indelHistogram.update(vc1.getAlternateAllele(0).length()); - } else if ( vc1.isSimpleDeletion() ) { - indelHistogram.update(-vc1.getReference().length()); - } - } - - return null; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java deleted file mode 100644 index 7ed179c32..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; - -import java.util.*; - -@Analysis(description = "Evaluation summary for multi-allelic variants") -public class MultiallelicAFs extends VariantEvaluator { - final protected static Logger logger = Logger.getLogger(MultiallelicAFs.class); - - public enum Type { - SNP, INDEL - } - - @DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele") - AFHistogram AFhistogramMaxSnp = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for less common SNP alternate alleles") - AFHistogram AFhistogramMinSnp = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for most common Indel alternate allele") - AFHistogram AFhistogramMaxIndel = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for less common Indel alternate alleles") - AFHistogram AFhistogramMinIndel = new AFHistogram(); - - /* - * AF histogram table object - */ - static class AFHistogram implements TableType { - private Object[] rowKeys, colKeys = {"count"}; - private int[] AFhistogram; - - private static final double AFincrement = 0.01; - private static final int numBins = (int)(1.00 / AFincrement); - - public AFHistogram() { - rowKeys = initRowKeys(); - AFhistogram = new int[rowKeys.length]; - } - - public Object[] getColumnKeys() { - return colKeys; - } - - public Object[] getRowKeys() { - return rowKeys; - } - - public Object getCell(int row, int col) { - return AFhistogram[row]; - } - - private static Object[] initRowKeys() { - ArrayList keyList = new ArrayList(numBins + 1); - for ( double a = 0.00; a <= 1.01; a += AFincrement ) { - keyList.add(String.format("%.2f", a)); - } - return keyList.toArray(); - } - - public String getName() { return "AFHistTable"; } - - public void update(final double AF) { - final int bin = (int)(numBins * MathUtils.round(AF, 2)); - AFhistogram[bin]++; - } - } - - public void initialize(VariantEvalWalker walker) {} - - @Override public boolean enabled() { return true; } - - public int getComparisonOrder() { - return 2; - } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {} - - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( eval == null || eval.isMonomorphicInSamples() ) - return null; - - if ( !eval.isBiallelic() ) - return null; - - // update counts - switch ( eval.getType() ) { - case SNP: - updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp); - break; - case INDEL: - updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel); - break; - default: - throw new UserException.BadInput("Unexpected variant context type: " + eval); - } - - return null; // we don't capture any interesting sites - } - - private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) { - - final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null); - if ( obj == null || !(obj instanceof List) ) - return; - - List list = (List)obj; - ArrayList AFs = new ArrayList(list.size()); - for ( String str : list ) { - AFs.add(Double.valueOf(str)); - } - - Collections.sort(AFs); - max.update(AFs.get(AFs.size()-1)); - for ( int i = 0; i < AFs.size() - 1; i++ ) - min.update(AFs.get(i)); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java deleted file mode 100755 index ab1f410f9..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -/** - * Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings - * | File Templates. - */ -class NewPhaseStats { - public int neitherPhased; - public int onlyCompPhased; - public int onlyEvalPhased; - public int phasesAgree; - public int phasesDisagree; - - public NewPhaseStats() { - this.neitherPhased = 0; - this.onlyCompPhased = 0; - this.onlyEvalPhased = 0; - this.phasesAgree = 0; - this.phasesDisagree = 0; - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree); - return sb.toString(); - } - - public static String[] getFieldNamesArray() { - return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"}; - } - - public Object getField(int index) { - switch (index) { - case (0): - return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree); - case (1): - return neitherPhased; - case (2): - return onlyCompPhased; - case (3): - return onlyEvalPhased; - case (4): - return (phasesAgree + phasesDisagree); - case (5): - return phasesAgree; - case (6): - return phasesDisagree; - case (7): - return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree)); - default: - return -1; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java deleted file mode 100755 index 751f61a97..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.variantcontext.Genotype; - -import java.util.HashMap; - -/** - * Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings - * | File Templates. - */ -class NewSamplePreviousGenotypes { - private HashMap sampleGenotypes = null; - - public NewSamplePreviousGenotypes() { - this.sampleGenotypes = new HashMap(); - } - - public CompEvalGenotypes get(String sample) { - return sampleGenotypes.get(sample); - } - - public void put(String sample, CompEvalGenotypes compEvalGts) { - sampleGenotypes.put(sample, compEvalGts); - } - - public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) { - sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt)); - } -} From 9ddd5aec93605243c3d963fb10699465d40577ca Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 19:40:19 -0400 Subject: [PATCH 11/29] More eval modules being removed from VariantEval -- IndelStatistics is superceded by IndelStatistics --- .../evaluators/IndelStatistics.java | 295 ------------------ 1 file changed, 295 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java deleted file mode 100755 index 87b453ae3..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ /dev/null @@ -1,295 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.IndelUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.ArrayList; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -@Analysis(name = "IndelStatistics", description = "Shows various indel metrics and statistics") -public class IndelStatistics extends VariantEvaluator { - @DataPoint(description = "Indel Statistics") - IndelStats indelStats = null; - - // @DataPoint(description = "Indel Classification") - IndelClasses indelClasses = null; - - int numSamples = 0; - - public void initialize(VariantEvalWalker walker) { - numSamples = walker.getNumSamples(); - } - - private static final int INDEL_SIZE_LIMIT = 100; - private static final int IND_HET = 0; - private static final int IND_INS = 1; - private static final int IND_DEL = 2; - private static final int IND_COMPLEX = 3; - private static final int IND_HET_INS = 4; - private static final int IND_HOM_INS = 5; - private static final int IND_HET_DEL = 6; - private static final int IND_HOM_DEL = 7; - private static final int IND_HOM_REF = 8; - private static final int IND_MIXED = 9; - private static final int IND_LONG = 10; - private static final int IND_AT_EXP = 11; - private static final int IND_CG_EXP = 12; - private static final int IND_FRAMESHIFT = 13; - private static final int NUM_SCALAR_COLUMNS = 14; - - static int len2Index(int ind) { - return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS; - } - - static int index2len(int ind) { - return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS; - } - - static class IndelStats implements TableType { - protected final static String[] COLUMN_KEYS; - - static { - COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1]; - COLUMN_KEYS[0] = "heterozygosity"; - COLUMN_KEYS[1] = "insertions"; - COLUMN_KEYS[2] = "deletions"; - COLUMN_KEYS[3] = "complex"; - COLUMN_KEYS[4] = "het_insertions"; - COLUMN_KEYS[5] = "homozygous_insertions"; - COLUMN_KEYS[6] = "het_deletions"; - COLUMN_KEYS[7] = "homozygous_deletions"; - COLUMN_KEYS[8] = "homozygous_reference_sites"; - COLUMN_KEYS[9] = "complex_events"; - COLUMN_KEYS[10] = "long_indels"; - COLUMN_KEYS[11] = "AT_expansions"; - COLUMN_KEYS[12] = "CG_expansions"; - COLUMN_KEYS[13] = "frameshift_indels"; - - for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++) - COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k)); - } - - // map of sample to statistics - protected final int[] indelSummary; - - public IndelStats(final VariantContext vc) { - indelSummary = new int[COLUMN_KEYS.length]; - } - - /** - * - * @return one row per sample - */ - public Object[] getRowKeys() { - return new String[]{"all"}; - } - public Object getCell(int x, int y) { - return String.format("%d",indelSummary[y]); - } - - /** - * get the column keys - * @return a list of objects, in this case strings, that are the column names - */ - public Object[] getColumnKeys() { - return COLUMN_KEYS; - } - - public String getName() { - return "IndelStats"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public String toString() { - return getName(); - } - - /* - * increment the specified value - */ - public void incrValue(VariantContext vc, ReferenceContext ref) { - int eventLength = 0; - boolean isInsertion = false, isDeletion = false; - - if ( vc.isSimpleInsertion() ) { - eventLength = vc.getAlternateAllele(0).length(); - indelSummary[IND_INS]++; - isInsertion = true; - } else if ( vc.isSimpleDeletion() ) { - indelSummary[IND_DEL]++; - eventLength = -vc.getReference().length(); - isDeletion = true; - } - else if (vc.isComplexIndel()) { - indelSummary[IND_COMPLEX]++; - } - else if (vc.isMixed()) - indelSummary[IND_MIXED]++; - - if (IndelUtils.isATExpansion(vc,ref)) - indelSummary[IND_AT_EXP]++; - if (IndelUtils.isCGExpansion(vc,ref)) - indelSummary[IND_CG_EXP]++; - - // make sure event doesn't overstep array boundaries - if (vc.isSimpleDeletion() || vc.isSimpleInsertion()) { - if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) { - indelSummary[len2Index(eventLength)]++; - if (eventLength % 3 != 0) - indelSummary[IND_FRAMESHIFT]++; - } - else - indelSummary[IND_LONG]++; - } - - } - } - - static class IndelClasses implements TableType { - protected final static String[] columnNames = IndelUtils.getIndelClassificationNames(); - - - // map of sample to statistics - protected final int[] indelClassSummary; - - public IndelClasses(final VariantContext vc) { - indelClassSummary = new int[columnNames.length]; - } - - /** - * - * @return one row per sample - */ - public Object[] getRowKeys() { - return new String[]{"all"}; - } - public Object getCell(int x, int y) { - return String.format("%d",indelClassSummary[y]); - } - - /** - * get the column keys - * @return a list of objects, in this case strings, that are the column names - */ - public Object[] getColumnKeys() { - return columnNames; - } - - public String getName() { - return "IndelClasses"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public String toString() { - return getName(); - } - - private void incrementSampleStat(VariantContext vc, int index) { - indelClassSummary[index]++; - } - /* - * increment the specified value - */ - public void incrValue(VariantContext vc, ReferenceContext ref) { - - - ArrayList indices = IndelUtils.findEventClassificationIndex(vc,ref); - //System.out.format("pos:%d \nREF: %s, ALT: %s\n",vc.getStart(), vc.getReference().getDisplayString(), - // vc.getAlternateAllele(0).getDisplayString()); - - byte[] refBases = ref.getBases(); - //System.out.format("ref bef:%s\n",new String(Arrays.copyOfRange(refBases,0,refBases.length/2+1) )); - //System.out.format("ref aft:%s\n",new String(Arrays.copyOfRange(refBases,refBases.length/2+1,refBases.length) )); - for (int index: indices) { - incrementSampleStat(vc, index); - // System.out.println(IndelUtils.getIndelClassificationName(index)); - } - } - - } - - //public IndelStatistics(VariantEvalWalker parent) { - //super(parent); - // don't do anything - //} - - public String getName() { - return "IndelStatistics"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public boolean enabled() { - return true; - } - - public String toString() { - return getName(); - } - - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - if (eval != null && eval.isPolymorphicInSamples()) { - if ( indelStats == null ) { - indelStats = new IndelStats(eval); - } - if ( indelClasses == null ) { - indelClasses = new IndelClasses(eval); - } - - if ( eval.isIndel() || eval.isMixed() ) { - if (indelStats != null ) - indelStats.incrValue(eval, ref); - - if (indelClasses != null) - indelClasses.incrValue(eval, ref); - } - } - - return null; // This module doesn't capture any interesting sites, so return null - } - - public void finalizeEvaluation() { - int k=0; - } - -} From e4d49357ce598fb865de346d31ae73cd60435fbb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 21:08:08 -0400 Subject: [PATCH 14/29] Further cleanup of R --- public/R/titvFPEst.R | 138 ------------------------------------------- 1 file changed, 138 deletions(-) delete mode 100755 public/R/titvFPEst.R diff --git a/public/R/titvFPEst.R b/public/R/titvFPEst.R deleted file mode 100755 index 7af5e8bbb..000000000 --- a/public/R/titvFPEst.R +++ /dev/null @@ -1,138 +0,0 @@ -titvFPEst <- function(titvExpected, titvObserved) { max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), 0.001) } - -titvFPEstV <- function(titvExpected, titvs) { - sapply(titvs, function(x) titvFPEst(titvExpected, x)) -} - -calcHet <- function(nknown, knownTiTv, nnovel, novelTiTv, callable) { - TP <- nknown + (1-titvFPEst(knownTiTv, novelTiTv)) * nnovel - 2 * TP / 3 / callable -} - -marginalTiTv <- function( nx, titvx, ny, titvy ) { - tvx = nx / (titvx + 1) - tix = nx - tvx - tvy = ny / (titvy + 1) - tiy = ny - tvy - tiz = tix - tiy - tvz = tvx - tvy - return(tiz / tvz) -} -marginaldbSNPRate <- function( nx, dbx, ny, dby ) { - knownx = nx * dbx / 100 - novelx = nx - knownx - knowny = ny * dby / 100 - novely = ny - knowny - knownz = knownx - knowny - novelz = novelx - novely - return(knownz / ( knownz + novelz ) * 100) -} - -numExpectedCalls <- function(L, theta, calledFractionOfRegion, nIndividuals, dbSNPRate) { - nCalls <- L * theta * calledFractionOfRegion * sum(1 / seq(1, 2 * nIndividuals)) - return(list(nCalls = nCalls, nKnown = dbSNPRate * nCalls, nNovel = (1-dbSNPRate) * nCalls)) -} - -normalize <- function(x) { - x / sum(x) -} - -normcumsum <- function(x) { - cumsum(normalize(x)) -} - -cumhist <- function(d, ...) { - plot(d[order(d)], type="b", col="orange", lwd=2, ...) -} - -revcumsum <- function(x) { - return(rev(cumsum(rev(x)))) -} - -phred <- function(x) { - log10(max(x,10^(-9.9)))*-10 -} - -pOfB <- function(b, B, Q) { - #print(paste(b, B, Q)) - p = 1 - 10^(-Q/10) - if ( b == B ) - return(p) - else - return(1 - p) -} - -pOfG <- function(bs, qs, G) { - a1 = G[1] - a2 = G[2] - - log10p = 0 - for ( i in 1:length(bs) ) { - b = bs[i] - q = qs[i] - p1 = pOfB(b, a1, q) / 2 + pOfB(b, a2, q) / 2 - log10p = log10p + log10(p1) - } - - return(log10p) -} - -pOfGs <- function(nAs, nBs, Q) { - bs = c(rep("a", nAs), rep("t", nBs)) - qs = rep(Q, nAs + nBs) - G1 = c("a", "a") - G2 = c("a", "t") - G3 = c("t", "t") - - log10p1 = pOfG(bs, qs, G1) - log10p2 = pOfG(bs, qs, G2) - log10p3 = pOfG(bs, qs, G3) - Qsample = phred(1 - 10^log10p2 / sum(10^(c(log10p1, log10p2, log10p3)))) - - return(list(p1=log10p1, p2=log10p2, p3=log10p3, Qsample=Qsample)) -} - -QsampleExpected <- function(depth, Q) { - weightedAvg = 0 - for ( d in 1:(depth*3) ) { - Qsample = 0 - pOfD = dpois(d, depth) - for ( nBs in 0:d ) { - pOfnB = dbinom(nBs, d, 0.5) - nAs = d - nBs - Qsample = pOfGs(nAs, nBs, Q)$Qsample - #Qsample = 1 - weightedAvg = weightedAvg + Qsample * pOfD * pOfnB - print(as.data.frame(list(d=d, nBs = nBs, pOfD=pOfD, pOfnB = pOfnB, Qsample=Qsample, weightedAvg = weightedAvg))) - } - } - - return(weightedAvg) -} - -plotQsamples <- function(depths, Qs, Qmax) { - cols = rainbow(length(Qs)) - plot(depths, rep(Qmax, length(depths)), type="n", ylim=c(0,Qmax), xlab="Average sequencing coverage", ylab="Qsample", main = "Expected Qsample values, including depth and allele sampling") - - for ( i in 1:length(Qs) ) { - Q = Qs[i] - y = as.numeric(lapply(depths, function(x) QsampleExpected(x, Q))) - points(depths, y, col=cols[i], type="b") - } - - legend("topleft", paste("Q", Qs), fill=cols) -} - -pCallHetGivenDepth <- function(depth, nallelesToCall) { - depths = 0:(2*depth) - pNoAllelesToCall = apply(as.matrix(depths),1,function(d) sum(dbinom(0:nallelesToCall,d,0.5))) - dpois(depths,depth)*(1-pNoAllelesToCall) -} - -pCallHets <- function(depth, nallelesToCall) { - sum(pCallHetGivenDepth(depth,nallelesToCall)) -} - -pCallHetMultiSample <- function(depth, nallelesToCall, nsamples) { - 1-(1-pCallHets(depth,nallelesToCall))^nsamples -} From bcf80cc7b3bd527a3f0eab6f863e4c75fcec72dd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 21:14:44 -0400 Subject: [PATCH 15/29] Cleanup in VariantEval. Example of molten VariantEval output -- Moved a variety of useful formatting routines for ratios, percentages, etc, into VariantEvalator.java so everyone can share. Code updated to use these routines where appropriate -- Added variantWasSingleton() to VariantEvaluator, which can be used to determine if a site, even after subsetting to specific samples, was a singleton in the original full VCF -- TableType, which used to be an interface, is now an abstract class, allowing us to implement some generally functionality and avoid duplication. -- This included creating a getRowName() function that used to be hardcoded as "row" but how can be overridden. -- #### This allows us implement molten tables, which are vastly easier to use than multi-row data sets. See IndelHistogram class (in later commit) for example of molten VE output --- .../varianteval/VariantEvalWalker.java | 31 ++++---------- .../varianteval/evaluators/CountVariants.java | 12 +++--- .../evaluators/GenotypeConcordance.java | 8 ++-- .../evaluators/GenotypePhasingEvaluator.java | 2 +- .../evaluators/MultiallelicSummary.java | 17 ++------ .../evaluators/VariantEvaluator.java | 42 +++++++++++++++++++ .../evaluators/VariantQualityScore.java | 12 +----- .../evaluators/VariantSummary.java | 4 +- .../walkers/varianteval/util/TableType.java | 12 +++--- .../varianteval/util/VariantEvalUtils.java | 2 +- 10 files changed, 75 insertions(+), 67 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index d18c7e10a..3a67fd5d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -93,6 +93,7 @@ import java.util.*; */ @Reference(window=@Window(start=-50, stop=50)) public class VariantEvalWalker extends RodWalker implements TreeReducible { + public static final String IS_SINGLETON_KEY = "ISSINGLETON"; @Output protected PrintStream out; @@ -494,7 +495,7 @@ public class VariantEvalWalker extends RodWalker implements Tr if (field.get(ve) instanceof TableType) { TableType t = (TableType) field.get(ve); - String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); + final String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); final DataPoint dataPointAnn = datamap.get(field); GATKReportTable table; @@ -509,17 +510,10 @@ public class VariantEvalWalker extends RodWalker implements Tr table.addColumn(vs.getName(), "unknown"); } - table.addColumn("row", "unknown"); - - for ( Object o : t.getColumnKeys() ) { - String c; - - if (o instanceof String) { - c = (String) o; - } else { - c = o.toString(); - } + table.addColumn(t.getRowName(), "unknown"); + for ( final Object o : t.getColumnKeys() ) { + final String c = o.toString(); table.addColumn(c, 0.0); } } else { @@ -527,7 +521,7 @@ public class VariantEvalWalker extends RodWalker implements Tr } for (int row = 0; row < t.getRowKeys().length; row++) { - String r = (String) t.getRowKeys()[row]; + final String r = t.getRowKeys()[row].toString(); for ( VariantStratifier vs : stratificationObjects ) { final String columnName = vs.getName(); @@ -535,17 +529,10 @@ public class VariantEvalWalker extends RodWalker implements Tr } for (int col = 0; col < t.getColumnKeys().length; col++) { - String c; - if (t.getColumnKeys()[col] instanceof String) { - c = (String) t.getColumnKeys()[col]; - } else { - c = t.getColumnKeys()[col].toString(); - } - - String newStateKey = stateKey.toString() + r; + final String c = t.getColumnKeys()[col].toString(); + final String newStateKey = stateKey.toString() + r; table.set(newStateKey, c, t.getCell(row, col)); - - table.set(newStateKey, "row", r); + table.set(newStateKey, t.getRowName(), r); } } } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 9a97b005c..6fc4208ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -30,7 +30,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { @DataPoint(description = "Number of variants per base", format = "%.8f") public double variantRatePerBp = 0; - @DataPoint(description = "Number of snp loci", format = "%d") public long nSNPs = 0; @DataPoint(description = "Number of mnp loci", format = "%d") @@ -47,7 +46,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { @DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)", format = "%d") public long nMixed = 0; - @DataPoint(description = "Number of no calls loci", format = "%d") public long nNoCalls = 0; @DataPoint(description = "Number of het loci", format = "%d") @@ -72,8 +70,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval { public double indelRate = 0; @DataPoint(description = "indel rate per base pair", format = "%.2f") public double indelRatePerBp = 0; - @DataPoint(description = "deletion to insertion ratio", format = "%.2f") - public double deletionInsertionRatio = 0; + @DataPoint(description = "insertion to deletion ratio", format = "%.2f") + public double insertionDeletionRatio = 0; private double perLocusRate(long n) { return rate(n, nProcessedLoci); @@ -113,12 +111,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval { case SNP: nVariantLoci++; nSNPs++; - if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; + if (variantWasSingleton(vc1)) nSingletons++; break; case MNP: nVariantLoci++; nMNPs++; - if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; + if (variantWasSingleton(vc1)) nSingletons++; break; case INDEL: nVariantLoci++; @@ -201,6 +199,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { hetHomRatio = ratio(nHets, nHomVar); indelRate = perLocusRate(nDeletions + nInsertions + nComplex); indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex); - deletionInsertionRatio = ratio(nDeletions, nInsertions); + insertionDeletionRatio = ratio(nInsertions, nDeletions); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 4f5aeed61..75aacf5ba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -59,7 +59,7 @@ public class GenotypeConcordance extends VariantEvaluator { private boolean discordantInteresting = false; - static class FrequencyStats implements TableType { + static class FrequencyStats extends TableType { class Stats { public Stats(int found, int missed) { nFound = found; nMissed = missed; } public long nFound = 0; @@ -103,7 +103,7 @@ public class GenotypeConcordance extends VariantEvaluator { } } - static class QualityScoreHistograms implements TableType { + static class QualityScoreHistograms extends TableType { final static int NUM_BINS = 20; final HashMap truePositiveQualityScoreMap = new HashMap(); // A HashMap holds all the quality scores until we are able to bin them appropriately final HashMap falsePositiveQualityScoreMap = new HashMap(); @@ -362,7 +362,7 @@ public class GenotypeConcordance extends VariantEvaluator { /** * a table of sample names to genotype concordance figures */ -class SampleStats implements TableType { +class SampleStats extends TableType { private final int nGenotypeTypes; // sample to concordance stats object @@ -448,7 +448,7 @@ class SampleStats implements TableType { /** * a table of sample names to genotype concordance summary statistics */ -class SampleSummaryStats implements TableType { +class SampleSummaryStats extends TableType { protected final static String ALL_SAMPLES_KEY = "allSamples"; protected final static String[] COLUMN_KEYS = new String[]{ "percent_comp_ref_called_ref", diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index f4369401b..2f9671d90 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -376,7 +376,7 @@ class PhaseStats { /** * a table of sample names to genotype phasing statistics */ -class SamplePhasingStatistics implements TableType { +class SamplePhasingStatistics extends TableType { private HashMap sampleStats = null; private double minPhaseQuality; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 5cea0322f..1c34be4a1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -87,13 +87,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva public String indelNoveltyRate = "NA"; - public void initialize(VariantEvalWalker walker) {} - @Override public boolean enabled() { return true; } - - public int getComparisonOrder() { - return 2; - } + @Override public int getComparisonOrder() { return 2; } public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); @@ -156,12 +151,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva // TODO -- implement me } - private final String noveltyRate(final int all, final int known) { - final int novel = all - known; - final double rate = (novel / (1.0 * all)); - return all == 0 ? "NA" : String.format("%.2f", rate); - } - public void finalizeEvaluation() { processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; @@ -170,7 +159,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva TiTvRatio = (double)nTi / (double)nTv; - SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete); - indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete); + SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs); + indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 83a1c2f3b..7e5cf37ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -49,4 +49,46 @@ public abstract class VariantEvaluator { return true; } + /** + * Returns true if the variant in vc was a singleton in the original input evaluation + * set, regardless of variant context subsetting that has occurred. + * @param eval + * @return true if eval was originally a singleton site + */ + protected static final boolean variantWasSingleton(final VariantContext eval) { + return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); + } + + /** + * Convenience function that formats the novelty rate as a %.2f string + * + * @param known number of variants from all that are known + * @param all number of all variants + * @return a String novelty rate, or NA if all == 0 + */ + protected static final String formattedNoveltyRate(final int known, final int all) { + return formattedPercent(all - known, all); + } + + /** + * Convenience function that formats the novelty rate as a %.2f string + * + * @param x number of objects part of total that meet some criteria + * @param total count of all objects, including x + * @return a String percent rate, or NA if total == 0 + */ + protected static final String formattedPercent(final int x, final int total) { + return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total)); + } + + /** + * Convenience function that formats a ratio as a %.2f string + * + * @param num number of observations in the numerator + * @param denom number of observations in the denumerator + * @return a String formatted ratio, or NA if all == 0 + */ + protected static final String formattedRatio(final int num, final int denom) { + return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index ce9e45c9b..8417faf5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -54,7 +54,7 @@ public class VariantQualityScore extends VariantEvaluator { @DataPoint(description = "average variant quality for each allele count") AlleleCountStats alleleCountStats = null; - static class TiTvStats implements TableType { + static class TiTvStats extends TableType { final static int NUM_BINS = 20; final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately final long transitionByQuality[] = new long[NUM_BINS]; @@ -73,10 +73,6 @@ public class VariantQualityScore extends VariantEvaluator { return columnKeys; } - public String getName() { - return "TiTvStats"; - } - public String getCell(int x, int y) { return String.valueOf(titvByQuality[y]); } @@ -143,7 +139,7 @@ public class VariantQualityScore extends VariantEvaluator { } } - class AlleleCountStats implements TableType { + class AlleleCountStats extends TableType { final HashMap> qualityListMap = new HashMap>(); final HashMap qualityMap = new HashMap(); @@ -163,10 +159,6 @@ public class VariantQualityScore extends VariantEvaluator { return new String[]{"alleleCount","avgQual"}; } - public String getName() { - return "AlleleCountStats"; - } - public String getCell(int x, int y) { int iii = 0; for( final Integer key : qualityListMap.keySet() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index aa3eff756..31f9a4f78 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -255,9 +255,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { private final String noveltyRate(Type type) { final int all = allVariantCounts.all(type); final int known = knownVariantCounts.all(type); - final int novel = all - known; - final double rate = (novel / (1.0 * all)); - return all == 0 ? "NA" : String.format("%.2f", rate); + return formattedNoveltyRate(known, all); } public void finalizeEvaluation() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java index 7ffc3e2c8..6ab7d1af3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java @@ -9,9 +9,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; * * an interface for turning arbritary objects into tables */ -public interface TableType { - public Object[] getRowKeys(); - public Object[] getColumnKeys(); - public Object getCell(int x, int y); - public String getName(); +public abstract class TableType { + public abstract Object[] getRowKeys(); + public abstract Object[] getColumnKeys(); + public abstract Object getCell(int x, int y); + public String getName() { return getClass().getSimpleName(); } + public String getRowName() { return "row"; } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 44af9f574..f9e740576 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -310,7 +310,7 @@ public class VariantEvalUtils { final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount(); if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) { - builder.attribute("ISSINGLETON", true); + builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true); } VariantContextUtils.calculateChromosomeCounts(builder, true); From 6df96644d94ccf270ba2dc7838abc61ba4498acf Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 21:18:38 -0400 Subject: [PATCH 16/29] Unified, standard IndelSummary metrics for VariantEval -- Now you always get SNP and indel metrics with VariantEval! -- Includes Number of SNPs, Number of singleton SNPs, Number of Indels, Number of singleton Indels, Percent of indel sites that are multi-allelic, SNP to indel ratio, Singleton SNP to indel ratio, Indel novelty rate, 1 to 2 bp indel ratio, 1 to 3 bp indel ratio, 2 to 3 bp indel ratio, 1 and 2 to 3 bp indel ratio, Frameshift percent, Insertion to deletion ratio, Insertion to deletion ratio for 1 bp events, Number of indels in protein-coding regions labeled as frameshift, Number of indels in protein-coding regions not labeled as frameshift, Het to hom ratio for SNPs, Het to hom ratio for indels, a Histogram of indel lengths, Number of large (>10 bp) deletions, Number of large (>10 bp) insertions, Ratio of large (>10 bp) insertions to deletions -- Updated VE integration tests as appropriate --- .../varianteval/evaluators/IndelSummary.java | 230 ++++++++++++++++++ .../varianteval/util/IndelHistogram.java | 113 +++++++++ .../VariantEvalIntegrationTest.java | 34 +-- 3 files changed, 360 insertions(+), 17 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java new file mode 100644 index 000000000..51cf2bb6a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.IndelHistogram; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +@Analysis(description = "Evaluation summary for indels") +public class IndelSummary extends VariantEvaluator implements StandardEval { + final protected static Logger logger = Logger.getLogger(IndelSummary.class); + + @DataPoint(description = "Number of SNPs", format = "%d") + public int n_SNPs = 0; + + @DataPoint(description = "Number of singleton SNPs", format = "%d") + public int n_singleton_SNPs = 0; + + @DataPoint(description = "Number of Indels", format = "%d") + public int n_indels = 0; + + // Number of Indels Sites (counts one for any number of alleles at site) + public int nIndelSites = 0; + + @DataPoint(description = "Number of singleton Indels", format = "%d") + public int n_singleton_indels = 0; + + // counts 1 for each site where the number of alleles > 2 + public int nMultiIndelSites = 0; + + @DataPoint(description = "Percent of indel sites that are multi-allelic") + public String percent_of_sites_with_more_than_2_alleles; + + @DataPoint(description = "SNP to indel ratio") + public String SNP_to_indel_ratio; + + @DataPoint(description = "Singleton SNP to indel ratio") + public String SNP_to_indel_ratio_for_singletons; + + @DataPoint(description = "Indel novelty rate") + public String indel_novelty_rate; + + @DataPoint(description = "1 to 2 bp indel ratio") + public String ratio_of_1_to_2_bp_indels; + + @DataPoint(description = "1 to 3 bp indel ratio") + public String ratio_of_1_to_3_bp_indels; + + @DataPoint(description = "2 to 3 bp indel ratio") + public String ratio_of_2_to_3_bp_indels; + + @DataPoint(description = "1 and 2 to 3 bp indel ratio") + public String ratio_of_1_and_2_to_3_bp_indels; + + @DataPoint(description = "Frameshift percent") + public String frameshift_rate_for_coding_indels; + + // + // insertions to deletions + // + @DataPoint(description = "Insertion to deletion ratio") + public String insertion_to_deletion_ratio; + + @DataPoint(description = "Insertion to deletion ratio for 1 bp events") + public String insertion_to_deletion_ratio_for_1bp_indels; + + // + // Frameshifts + // + @DataPoint(description = "Number of indels in protein-coding regions labeled as frameshift") + public int n_coding_indels_frameshifting = 0; + + @DataPoint(description = "Number of indels in protein-coding regions not labeled as frameshift") + public int n_coding_indels_in_frame = 0; + + // + // Het : hom ratios + // + @DataPoint(description = "Het to hom ratio for SNPs") + public String SNP_het_to_hom_ratio; + + @DataPoint(description = "Het to hom ratio for indels") + public String indel_het_to_hom_ratio; + + int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0; + + int nKnownIndels = 0, nInsertions = 0; + int n1bpInsertions = 0, n1bpDeletions = 0; + int[] countByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used + + public final static int MAX_SIZE_FOR_HISTOGRAM = 10; + @DataPoint(description = "Histogram of indel lengths") + IndelHistogram lengthHistogram = new IndelHistogram(MAX_SIZE_FOR_HISTOGRAM, true); + + @DataPoint(description = "Number of large (>10 bp) deletions") + public int n_large_deletions = 0; + + @DataPoint(description = "Number of large (>10 bp) insertions") + public int n_large_insertions = 0; + + @DataPoint(description = "Ratio of large (>10 bp) insertions to deletions") + public String insertion_to_deletion_ratio_for_large_indels; + + @Override public boolean enabled() { return true; } + @Override public int getComparisonOrder() { return 2; } + + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( eval == null || eval.isMonomorphicInSamples() ) + return null; + + // update counts + switch ( eval.getType() ) { + case SNP: + n_SNPs += eval.getNAlleles() - 1; // -1 for ref + if ( variantWasSingleton(eval) ) n_singleton_SNPs++; + + // collect information about het / hom ratio + for ( final Genotype g : eval.getGenotypes() ) { + if ( g.isHet() ) nSNPHets++; + if ( g.isHomVar() ) nSNPHoms++; + } + break; + case INDEL: + if ( eval.isComplexIndel() ) break; // don't count complex substitutions + + nIndelSites++; + if ( ! eval.isBiallelic() ) nMultiIndelSites++; + if ( variantWasSingleton(eval) ) n_singleton_indels++; + + // collect information about het / hom ratio + for ( final Genotype g : eval.getGenotypes() ) { + if ( g.isHet() ) nIndelHets++; + if ( g.isHomVar() ) nIndelHoms++; + } + + for ( Allele alt : eval.getAlternateAlleles() ) { + n_indels++; // +1 for each alt allele + + if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific? + + // ins : del ratios + final int alleleSize = alt.length() - eval.getReference().length(); + if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference()); + if ( alleleSize > 0 ) nInsertions++; + if ( alleleSize == 1 ) n1bpInsertions++; + if ( alleleSize == -1 ) n1bpDeletions++; + + // update the length histogram + lengthHistogram.update(eval.getReference(), alt); + + // requires snpEFF annotations + if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) { + final String effect = eval.getAttributeAsString("SNPEFF_EFFECT", "missing"); + if ( effect.equals("missing") ) + throw new ReviewedStingException("Saw SNPEFF_GENE_BIOTYPE but unexpected no SNPEFF_EFFECT at " + eval); + if ( effect.equals("FRAME_SHIFT") ) + n_coding_indels_frameshifting++; + else if ( effect.startsWith("CODON") ) + n_coding_indels_in_frame++; + else + ; // lots of protein coding effects that shouldn't be counted, such as INTRON + } + + // update the baby histogram + final int absSize = Math.abs(alleleSize); + if ( absSize < countByLength.length ) countByLength[absSize]++; + } + + break; + default: + throw new UserException.BadInput("Unexpected variant context type: " + eval); + } + + return null; // we don't capture any interesting sites + } + + public void finalizeEvaluation() { + percent_of_sites_with_more_than_2_alleles = formattedRatio(nMultiIndelSites, nIndelSites); + SNP_to_indel_ratio = formattedRatio(n_SNPs, n_indels); + SNP_to_indel_ratio_for_singletons = formattedRatio(n_singleton_SNPs, n_singleton_indels); + indel_novelty_rate = formattedNoveltyRate(nKnownIndels, n_indels); + ratio_of_1_to_2_bp_indels = formattedRatio(countByLength[1], countByLength[2]); + ratio_of_1_to_3_bp_indels = formattedRatio(countByLength[1], countByLength[3]); + ratio_of_2_to_3_bp_indels = formattedRatio(countByLength[2], countByLength[3]); + ratio_of_1_and_2_to_3_bp_indels = formattedRatio(countByLength[1] + countByLength[2], countByLength[3]); + frameshift_rate_for_coding_indels = formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting); + + SNP_het_to_hom_ratio = formattedRatio(nSNPHets, nSNPHoms); + indel_het_to_hom_ratio = formattedRatio(nIndelHets, nIndelHoms); + + n_large_deletions = lengthHistogram.getnTooBigDeletions(); + n_large_insertions = lengthHistogram.getnTooBigInsertions(); + + insertion_to_deletion_ratio = formattedRatio(nInsertions, n_indels - nInsertions); + insertion_to_deletion_ratio_for_1bp_indels = formattedRatio(n1bpInsertions, n1bpDeletions); + insertion_to_deletion_ratio_for_large_indels = formattedRatio(n_large_insertions, n_large_deletions); + + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java new file mode 100644 index 000000000..a6c86d3da --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.util; + +import org.broadinstitute.sting.utils.variantcontext.Allele; + +import java.util.*; + +/** + * Simple utility for histogramming indel lengths + * + * Based on code from chartl + * + * @author Mark DePristo + * @since 3/21/12 + */ +public class IndelHistogram extends TableType { + private final boolean asFrequencies; + int nIndels = 0, nTooBigDeletions = 0, nTooBigInsertions = 0; + private final Integer[] rowKeys; + + private Map frequencies = null; + private final Map counts = new HashMap(); + + public IndelHistogram(int maxSize, boolean asFrequencies) { + this.asFrequencies = asFrequencies; + initializeCounts(maxSize); + this.rowKeys = new ArrayList(counts.keySet()).toArray(new Integer[maxSize]); + } + + private void initializeCounts(int size) { + for ( int i = -size; i <= size; i++ ) { + if ( i != 0 ) counts.put(i, 0); + } + } + + @Override + public String getRowName() { + return "Length"; + } + + @Override + public Object[] getColumnKeys() { + return new String[]{"Count"}; + } + + @Override + public Object[] getRowKeys() { + return rowKeys; + } + + @Override + public Object getCell(int row, int col) { + final int key = (Integer)getRowKeys()[row]; + if ( asFrequencies ) { + if ( frequencies == null ) { + frequencies = new HashMap(); + for ( final int len : counts.keySet() ) { + final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels); + frequencies.put(len, value); + } + } + return frequencies.get(key); + } + return counts.get(key); + } + + public int getnTooBigDeletions() { + return nTooBigDeletions; + } + + public int getnTooBigInsertions() { + return nTooBigInsertions; + } + + public void update(final Allele ref, final Allele alt) { + final int alleleSize = alt.length() - ref.length(); + update(alleleSize); + } + + public void update(int len) { + if ( counts.containsKey(len) ) { + nIndels++; + counts.put(len, counts.get(len) + 1); + } else if ( len < 0 ) { + nTooBigDeletions++; + } else { + nTooBigInsertions++; + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 9f69554fe..610733d9c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -94,7 +94,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("94fb8cba9e236131c6fbf1d7fee738fe") + Arrays.asList("7a726ecbedd722fa7cd4de3e023b7a82") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -115,7 +115,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("670979268b05c3024297ba98d67d89ab") + Arrays.asList("95bb4a4267a8f29dd7a8169561499f20") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -137,7 +137,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c38ce9c872a76ae7dd26c3e353bf0765") + Arrays.asList("9b51029083495935823fb0447a2857b9") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -158,7 +158,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2c37f23bf6114a2b27f21ed445806fd2") + Arrays.asList("318b5fbbc61e2fc11d49369359812edd") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -179,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("206f0d629de9af0b97340cb22d34a81b") + Arrays.asList("74c02df2ef69dda231a2aec2a948747b") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -200,7 +200,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("bd869725429deae8f56175ba9a8ab390") + Arrays.asList("2d97b1fe15e532e89803ba7ba347ff20") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -221,7 +221,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9c7f6783a57ad681bb754b5e71de27dc") + Arrays.asList("474cbc231ddbc4ba299ffe61a17405b6") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -244,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a2d280440aa3771937f3d2d10f1eea74") + Arrays.asList("2cc9bc4bbe8b4edb6dc27642ec41f66e") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -269,7 +269,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2925d811dd521beb00059f8c8e818d83") + Arrays.asList("00c94cf3e14bc2855d39bbefa27f9bb2") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -288,7 +288,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("4b79bf2dfd73ddac0ceb0838a352bf9a") + Arrays.asList("a0c0d4805db1245aa30a306aa506096f") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -301,7 +301,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("1739654de350541edf429888b708ae01")); + 1, Arrays.asList("2192418a70a8e018a1675d4f425155f3")); executeTestParallel("testSelect1", spec); } @@ -329,7 +329,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d57cf846bc26d338edcf181fb0c85535")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2282523336c24d434d1cc0eb1697b4f9")); executeTestParallel("testCompVsEvalAC",spec); } @@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("b663745a39f62bfa5b5d486811cf57ec")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ec321fcc424fbad74a4a74e739173d03")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -371,7 +371,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("f1e1b1469dca86d72ae79a2d3e10612c")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ccaea6245086552cd63f828eabddfaf3")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -449,7 +449,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("0c632b5be8a54e43afa576510b40c4da") + Arrays.asList("9954c769ef37c47d3b61481ab0807be0") ); executeTest("testAlleleCountStrat", spec); } @@ -470,7 +470,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("92404820a94e7cfb854ae73450a0fbd9") + Arrays.asList("c0d69ce7647a575d166d8bab5aa16299") ); executeTest("testIntervalStrat", spec); } @@ -487,7 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("417875ab1924b7e7950fa10daee393d2") + Arrays.asList("9a8ffb506118c1bde6f7bfadc4fb6f10") ); executeTest("testModernVCFWithLargeIndels", spec); } From fee8d86f63dd517f59841a31640f32108c4be243 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Mar 2012 22:13:24 -0400 Subject: [PATCH 18/29] VariantEval optimization -- Use a LinkedHashMap not a TreeMap so iteration is faster. -- Note that with a lot of stratifications the update0 is taking up a lot of time. For example, with 822 samples and functional class and sample on there are 100K contexts and 30% of the runtime is just in the update0 call --- .../varianteval/VariantEvalWalker.java | 2 +- .../util/NewEvaluationContext.java | 28 ++++++------------- .../varianteval/util/VariantEvalUtils.java | 2 +- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 3a67fd5d7..4fc7a1f41 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -326,7 +326,7 @@ public class VariantEvalWalker extends RodWalker implements Tr */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( NewEvaluationContext nec : evaluationContexts.values() ) { + for ( final NewEvaluationContext nec : evaluationContexts.values() ) { synchronized (nec) { nec.update0(tracker, ref, context); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index c34e44516..f9d8e437b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -10,29 +10,17 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.HashMap; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; public class NewEvaluationContext extends HashMap { - public TreeMap evaluationInstances; - - public String toString() { - String value = ""; - - for ( VariantStratifier key : this.keySet() ) { - value += "\t" + key.getName() + ":" + this.get(key) + "\n"; - } - - return value; - } + private Map evaluationInstances; public void addEvaluationClassList(VariantEvalWalker walker, StateKey stateKey, Set> evaluationClasses) { - evaluationInstances = new TreeMap(); + evaluationInstances = new LinkedHashMap(evaluationClasses.size()); - for ( Class c : evaluationClasses ) { + for ( final Class c : evaluationClasses ) { try { - VariantEvaluator eval = c.newInstance(); + final VariantEvaluator eval = c.newInstance(); eval.initialize(walker); if (eval.stateIsApplicable(stateKey)) { @@ -47,11 +35,11 @@ public class NewEvaluationContext extends HashMap { } public TreeMap getEvaluationClassList() { - return evaluationInstances; + return new TreeMap(evaluationInstances); } public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { + for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { // we always call update0 in case the evaluation tracks things like number of bases covered // the other updateN methods don't see a null context @@ -79,7 +67,7 @@ public class NewEvaluationContext extends HashMap { } public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { + for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { evaluation.update0(tracker, ref, context); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index f9e740576..91c7140e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -191,7 +191,7 @@ public class VariantEvalUtils { * @return a map of all the evaluation contexts */ public HashMap initializeEvaluationContexts(Set stratificationObjects, Set> evaluationObjects, Stack stratStack, NewEvaluationContext ec) { - HashMap ecs = new HashMap(); + HashMap ecs = new LinkedHashMap(); if (stratStack == null) { stratStack = new Stack(); From ab288354e9a6e9c726e285bcfafc103476eec16b Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 23 Mar 2012 10:47:01 -0400 Subject: [PATCH 20/29] Better error message for malformed input recal file. --- .../gatk/walkers/variantrecalibration/ApplyRecalibration.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index fac5b22aa..6b36f4e1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -184,6 +184,8 @@ public class ApplyRecalibration extends RodWalker { } } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(RECAL_FILE, e); + } catch ( Exception e ) { + throw new UserException.MalformedFile(RECAL_FILE, "Could not parse LOD and annotation information in input recal file. File is somehow malformed."); } } From ff26f2bf688048bbd6e2b9ffcf31cedce4fa99dd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Mar 2012 11:27:10 -0400 Subject: [PATCH 21/29] HierarchicalMicroScheduler no longer attempts to wrap exceptions -- This behavior, which isn't obviously valuable at all, continued to grab and rethrow exceptions in the HMS that, if run without NT, would show up as more meaningful errors. Now HMS simply checks whether the throwable it received on error was a RuntimeException. If so, it is stored and rethrow without wrapping later. If it isn't, only in this case is the exception wrapped in a ReviewedStingException. -- Added a QC walker ErrorThrowingWalker that will throw a UserException, ReviewedStingException, and NullPointerException from map as specified on the command line -- Added IT that ensures that all three types are thrown properly (i.e., you catch a NullPointerException when you ask for one to be thrown) with and without threading enabled. -- I believe this will finally put to rest all of these annoying HMS captures. --- .../executive/HierarchicalMicroScheduler.java | 18 ++--- .../gatk/walkers/qc/ErrorThrowingWalker.java | 72 +++++++++++++++++++ .../gatk/EngineFeaturesIntegrationTest.java | 45 ++++++++++++ 3 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 433c7d82f..1cea14a9d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -47,7 +47,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar /** * An exception that's occurred in this traversal. If null, no exception has occurred. */ - private Throwable error = null; + private RuntimeException error = null; /** * Queue of incoming shards. @@ -345,22 +345,22 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar return error != null; } - private synchronized StingException getTraversalError() { + private synchronized RuntimeException getTraversalError() { if(!hasTraversalErrorOccurred()) throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists"); - - // If the error is already a StingException, pass it along as is. Otherwise, wrap it. - if(error instanceof StingException) - return (StingException)error; - else - return new ReviewedStingException("An error occurred during the traversal.",error); + return error; } /** * Allows other threads to notify of an error during traversal. */ protected synchronized void notifyOfTraversalError(Throwable error) { - this.error = error; + // If the error is already a Runtime, pass it along as is. Otherwise, wrap it. + if (error instanceof RuntimeException) + this.error = (RuntimeException)error; + else + this.error = new ReviewedStingException("An error occurred during the traversal.", error); + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java new file mode 100644 index 000000000..6f0490fd3 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowingWalker.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +/** + * a walker that simply throws errors. Allows us to test that the engine is behaving as expected with error handling + */ +public class ErrorThrowingWalker extends RodWalker implements TreeReducible { + @Input(fullName="exception", shortName = "E", doc="Java class of exception to throw", required=true) + public String exceptionToThrow; + + // + // Template code to allow us to build the walker, doesn't actually do anything + // + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( exceptionToThrow.equals("UserException") ) { + throw new UserException("UserException"); + } else if ( exceptionToThrow.equals("NullPointerException") ) { + throw new NullPointerException(); + } else if ( exceptionToThrow.equals("ReviewedStingException") ) { + throw new ReviewedStingException("ReviewedStingException"); + } else { + throw new UserException.BadArgumentValue("exception", "exception isn't a recognized value " + exceptionToThrow); + } + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + public Integer treeReduce(final Integer lhs, final Integer rhs) { + return lhs + rhs; + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 8cc2ffd96..192c86fe3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -25,9 +25,13 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; + /** * */ @@ -73,4 +77,45 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { @Test() private void testMissingInterval() { testMissingFile("missing interval", "-T UnifiedGenotyper -L missing.interval_list -I " + b37GoodBAM); } + + + // -------------------------------------------------------------------------------- + // + // Test that our exceptions are coming back as we expect + // + // -------------------------------------------------------------------------------- + + private class EngineErrorHandlingTestProvider extends TestDataProvider { + Class expectedException; + boolean multiThreaded; + + public EngineErrorHandlingTestProvider(Class exceptedException, final boolean multiThreaded) { + super(EngineErrorHandlingTestProvider.class); + this.expectedException = exceptedException; + this.multiThreaded = multiThreaded; + setName(String.format("Engine error handling: expected %s, is-multithreaded %b", exceptedException, multiThreaded)); + } + } + + @DataProvider(name = "EngineErrorHandlingTestProvider") + public Object[][] makeEngineErrorHandlingTestProvider() { + for ( final boolean multiThreaded : Arrays.asList(true, false)) { + new EngineErrorHandlingTestProvider(NullPointerException.class, multiThreaded); + new EngineErrorHandlingTestProvider(UserException.class, multiThreaded); + new EngineErrorHandlingTestProvider(ReviewedStingException.class, multiThreaded); + } + + return EngineErrorHandlingTestProvider.getTests(EngineErrorHandlingTestProvider.class); + } + + // + // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type + // + @Test(dataProvider = "EngineErrorHandlingTestProvider") + public void testEngineErrorHandlingTestProvider(EngineErrorHandlingTestProvider cfg) { + final String root = "-T ErrorThrowing -R " + b37KGReference; + final String args = root + (cfg.multiThreaded ? " -nt 2" : "") + " -E " + cfg.expectedException.getSimpleName(); + WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException); + executeTest(cfg.toString(), spec); + } } \ No newline at end of file From d3f2bc43613971d3d9c1adb65c451270cea571ce Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 23 Mar 2012 13:51:00 -0400 Subject: [PATCH 22/29] Pre-allocate 10 alt alleles worth of PLs in the cache for efficiency. This effectively means that we never need to re-allocate the cache in the future because we can't ever really handle that many alt alleles. --- .../sting/utils/variantcontext/GenotypeLikelihoods.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index ae3dff9ed..1f3aead07 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -222,7 +222,10 @@ public class GenotypeLikelihoods { /* * a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles */ - private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[]{ new GenotypeLikelihoodsAllelePair(0, 0) }; + private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex; + static { + calculatePLcache(65); // start with data for 10 alternate alleles + } private static void calculatePLcache(final int minIndex) { // how many alternate alleles do we need to calculate for? @@ -266,6 +269,7 @@ public class GenotypeLikelihoods { */ public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) { // make sure that we've cached enough data + // TODO -- this is not thread-safe! if ( PLindex >= PLIndexToAlleleIndex.length ) calculatePLcache(PLindex); From 539da9e3e1341e3866c421e9afd1e6eb8d66a0a0 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 23 Mar 2012 13:54:46 -0400 Subject: [PATCH 24/29] Fixing GATKReport exception handling when loading a report * allowing tables with no description to go through * GATKReportTable should be more lenient with the format requirements (added to-dos for roger) --- .../sting/gatk/report/GATKReport.java | 53 +++--- .../sting/gatk/report/GATKReportTable.java | 172 ++++++++++-------- 2 files changed, 120 insertions(+), 105 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index ff0c39f41..551d9eff8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -28,10 +28,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.PrintStream; +import java.io.*; import java.util.Collection; import java.util.TreeMap; @@ -85,36 +82,32 @@ public class GATKReport { * @param file the file to load */ private void loadReport(File file) { + BufferedReader reader; + String reportHeader; try { - BufferedReader reader = new BufferedReader(new FileReader(file)); - - String reportHeader = reader.readLine(); - - // Read the first line for the version and number of tables. - version = GATKReportVersion.fromHeader(reportHeader); - if (version.equals(GATKReportVersion.V0_1) || - version.equals(GATKReportVersion.V0_2)) - throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer."); - - int nTables = Integer.parseInt(reportHeader.split(":")[2]); - - // Read each tables according ot the number of tables - for (int i = 0; i < nTables; i++) { - addTable(new GATKReportTable(reader, version)); - - /* - if ( !blankLine.equals("") ) { - throw new StingException("The GATK Report File is corrupted or not formatted correctly"); - } - */ - } + reader = new BufferedReader(new FileReader(file)); + reportHeader = reader.readLine(); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("Could not open file : " + file); + } catch (IOException e) { + throw new ReviewedStingException("Could not read file : " + file); + } - } catch (Exception e) { - // todo - improve exception handling - //throw new StingException("Cannot read GATKReport: " + e); - e.printStackTrace(); + // Read the first line for the version and number of tables. + version = GATKReportVersion.fromHeader(reportHeader); + if (version.equals(GATKReportVersion.V0_1) || + version.equals(GATKReportVersion.V0_2)) + throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer."); + + int nTables = Integer.parseInt(reportHeader.split(":")[2]); + + // Read each tables according ot the number of tables + for (int i = 0; i < nTables; i++) { + addTable(new GATKReportTable(reader, version)); } + + } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 81d7d7710..e45ffc486 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.io.BufferedReader; +import java.io.IOException; import java.io.PrintStream; import java.util.*; import java.util.regex.Matcher; @@ -54,85 +55,107 @@ public class GATKReportTable { private GATKReportColumns columns; + private static final String COULD_NOT_READ_HEADER = "Could not read the header of this file -- "; + private static final String COULD_NOT_READ_COLUMN_NAMES = "Could not read the column names of this file -- "; + private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- "; + private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- "; + private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables"; + public GATKReportTable(BufferedReader reader, GATKReportVersion version) { - try { + int counter = 0; - int counter = 0; - - switch (version) { - case V1_0: - int nHeaders = 2; - String[] tableHeaders = new String[nHeaders]; - - // Read in the headers - for (int i = 0; i < nHeaders; i++) { + switch (version) { + case V1_0: + int nHeaders = 2; + String[] tableHeaders = new String[nHeaders]; + + // Read in the headers + for (int i = 0; i < nHeaders; i++) { + try { tableHeaders[i] = reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_HEADER + e.getMessage()); } - String[] tableData = tableHeaders[0].split(":"); - String[] userData = tableHeaders[1].split(":"); - - // Fill in the fields - tableName = userData[2]; - tableDescription = userData[3]; - primaryKeyDisplay = Boolean.parseBoolean(tableData[2]); - columns = new GATKReportColumns(); - - int nColumns = Integer.parseInt(tableData[3]); - int nRows = Integer.parseInt(tableData[4]); - - - // Read column names - String columnLine = reader.readLine(); - - List columnStarts = TextFormattingUtils.getWordStarts(columnLine); - String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); - - if (primaryKeyDisplay) { - addPrimaryKey(columnNames[0]); - - } else { - sortByPrimaryKey = true; - addPrimaryKey("id", false); - counter = 1; + } + String[] tableData = tableHeaders[0].split(":"); + String[] userData = tableHeaders[1].split(":"); + + // Fill in the fields + tableName = userData[2]; + tableDescription = (userData.length <= 3) ? "" : userData[3]; // table may have no description! (and that's okay) + primaryKeyDisplay = Boolean.parseBoolean(tableData[2]); + columns = new GATKReportColumns(); + + int nColumns = Integer.parseInt(tableData[3]); + int nRows = Integer.parseInt(tableData[4]); + + + // Read column names + String columnLine; + try { + columnLine = reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES); + } + + List columnStarts = TextFormattingUtils.getWordStarts(columnLine); + String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); + + if (primaryKeyDisplay) { + addPrimaryKey(columnNames[0]); + + } else { + sortByPrimaryKey = true; + addPrimaryKey("id", false); + counter = 1; + } + // Put in columns using the format string from the header + for (int i = 0; i < nColumns; i++) { + String format = tableData[5 + i]; + if (primaryKeyDisplay) + addColumn(columnNames[i + 1], true, format); + else + addColumn(columnNames[i], true, format); + } + + for (int i = 0; i < nRows; i++) { + // read line + String dataLine; + try { + dataLine = reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage()); } - // Put in columns using the format string from the header - for (int i = 0; i < nColumns; i++) { - String format = tableData[5 + i]; - if (primaryKeyDisplay) - addColumn(columnNames[i + 1], true, format); - else - addColumn(columnNames[i], true, format); - } - - for (int i = 0; i < nRows; i++) { - // read line - List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts)); - - for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) { - - //Input all the remaining values - GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType(); - - if (primaryKeyDisplay) { - String columnName = columnNames[columnIndex + 1]; - String primaryKey = lineSplits.get(0); - set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1))); - } else { - String columnName = columnNames[columnIndex]; - set(counter, columnName, type.Parse(lineSplits.get(columnIndex))); - } - + List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts)); + + for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) { + + //Input all the remaining values + GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType(); + + if (primaryKeyDisplay) { + String columnName = columnNames[columnIndex + 1]; + String primaryKey = lineSplits.get(0); + set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1))); + } else { + String columnName = columnNames[columnIndex]; + set(counter, columnName, type.Parse(lineSplits.get(columnIndex))); } - counter++; + } - - + counter++; + } + + + try { reader.readLine(); - // When you see empty line or null, quit out - } - } catch (Exception e) { - //throw new StingException("Cannot read GATKReport: " + e); - e.printStackTrace(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage()); + } + break; + + default: + throw new ReviewedStingException(OLD_GATK_TABLE_VERSION); } } @@ -418,12 +441,11 @@ public class GATKReportTable { if (newValue != null) value = newValue; - if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || - column.getDataType().equals(GATKReportDataType.Unknown) ) + // todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes. + if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) ) columns.get(columnName).put(primaryKey, value); else - throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", - GATKReportDataType.fromObject(value).name(), column.getDataType().name())); + throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name())); } /** From f421062b5594fd47c39a4a274ae65784c494fded Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Mar 2012 16:04:45 -0400 Subject: [PATCH 25/29] Updated read group covariate to use sample.lane instead of the id Added Unit test. --- .../gatk/walkers/bqsr/ReadGroupCovariate.java | 50 +++++++++++--- .../utils/sam/GATKSAMReadGroupRecord.java | 10 +++ .../sting/utils/sam/ReadUtils.java | 7 ++ .../bqsr/ContextCovariateUnitTest.java | 9 +-- .../walkers/bqsr/CycleCovariateUnitTest.java | 11 +-- .../bqsr/ReadGroupCovariateUnitTest.java | 67 +++++++++++++++++++ 6 files changed, 129 insertions(+), 25 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java index ad4f94f33..eb20f7779 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BitSetUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; +import java.util.regex.Pattern; /* * Copyright (c) 2009 The Broad Institute @@ -45,6 +47,10 @@ public class ReadGroupCovariate implements RequiredCovariate { private final HashMap readGroupLookupTable = new HashMap(); private final HashMap readGroupReverseLookupTable = new HashMap(); private short nextId = 0; + + private static final String LANE_TAG = "LN"; + private static final String SAMPLE_TAG = "SM"; + // Initialize any member variables using the command-line arguments passed to the walkers @Override @@ -54,14 +60,13 @@ public class ReadGroupCovariate implements RequiredCovariate { @Override public CovariateValues getValues(final GATKSAMRecord read) { final int l = read.getReadLength(); - final String readGroupId = read.getReadGroup().getReadGroupId(); - BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset + final String readGroupId = readGroupValueFromRG(read.getReadGroup()); + BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset BitSet[] readGroups = new BitSet[l]; Arrays.fill(readGroups, rg); return new CovariateValues(readGroups, readGroups, readGroups); } - // Used to get the covariate's value from input csv file during on-the-fly recalibration @Override public final Object getValue(final String str) { return str; @@ -77,15 +82,15 @@ public class ReadGroupCovariate implements RequiredCovariate { return bitSetForReadGroup((String) key); } - public final String decodeReadGroup(final short id) { - return readGroupReverseLookupTable.get(id); - } - @Override public int numberOfBits() { return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE); } - + + private String decodeReadGroup(final short id) { + return readGroupReverseLookupTable.get(id); + } + private BitSet bitSetForReadGroup(String readGroupId) { short shortId; if (readGroupLookupTable.containsKey(readGroupId)) @@ -98,6 +103,35 @@ public class ReadGroupCovariate implements RequiredCovariate { } return BitSetUtils.bitSetFrom(shortId); } + + /** + * Gather the sample and lane information from the read group record and return sample.lane + * + * If the bam file is missing the lane information, it tries to use the id regex standardized + * by the Broad Institute to extract the lane information + * + * If it fails to find either of the two pieces of information, will return the read group id instead. + * + * @param rg the read group record + * @return sample.lane or id if information is missing. + */ + private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) { + String lane = rg.getLane(); // take the sample's lane from the read group lane tag + String sample = rg.getSample(); // take the sample's name from the read group sample tag + String value = rg.getId(); // initialize the return value with the read group ID in case we can't find the sample or the lane + + if (lane == null) { // if this bam doesn't have the lane annotation in the read group try to take it from the read group id + String [] splitID = rg.getId().split(Pattern.quote(".")); + if (splitID.length > 1) // if the id doesn't follow the BROAD defined regex (PU.LANE), fall back to the read group id + lane = splitID[splitID.length - 1]; // take the lane from the readgroup id + } + + if (sample != null && lane != null) + value = sample + "." + lane; // the read group covariate is sample.lane (where the inforamtion is available) + + return value; + } + } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index ff7d12f09..df1ff2a0e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -13,6 +13,8 @@ import org.broadinstitute.sting.utils.NGSPlatform; */ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { + public static String LANE_TAG = "LN"; + // the SAMReadGroupRecord data we're caching private String mSample = null; private String mPlatform = null; @@ -79,4 +81,12 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { return mNGSPlatform; } + + public String getLane() { + return this.getAttribute(LANE_TAG); + } + + public void setLane(String lane) { + this.setAttribute(LANE_TAG, lane); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 9d731e489..cbb4120dd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -698,6 +698,13 @@ public class ReadUtils { return bases; } + public static GATKSAMRecord createRandomRead(int length) { + byte[] quals = ReadUtils.createRandomReadQuals(length); + byte[] bbases = ReadUtils.createRandomReadBases(length, true); + return ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + } + + public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) { String[] sequenceRecordNames = new String[sequenceDictionary.size()]; int sequenceRecordIndex = 0; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index 30a9bad3e..2b4cb2ae3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -1,9 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; @@ -11,7 +9,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.util.BitSet; -import java.util.Random; /** * @author Mauricio Carneiro @@ -20,22 +17,18 @@ import java.util.Random; public class ContextCovariateUnitTest { ContextCovariate covariate; RecalibrationArgumentCollection RAC; - Random random; @BeforeClass public void init() { RAC = new RecalibrationArgumentCollection(); covariate = new ContextCovariate(); - random = GenomeAnalysisEngine.getRandomGenerator(); covariate.initialize(RAC); } @Test(enabled = true) public void testSimpleContexts() { - byte[] quals = ReadUtils.createRandomReadQuals(10000); - byte[] bbases = ReadUtils.createRandomReadBases(10000, true); - GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + GATKSAMRecord read = ReadUtils.createRandomRead(1000); GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); CovariateValues values = covariate.getValues(read); verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases())); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java index 49315672c..d80cddd3e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -10,7 +8,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.util.BitSet; -import java.util.Random; /** * @author Mauricio Carneiro @@ -19,22 +16,18 @@ import java.util.Random; public class CycleCovariateUnitTest { CycleCovariate covariate; RecalibrationArgumentCollection RAC; - Random random; @BeforeClass public void init() { RAC = new RecalibrationArgumentCollection(); covariate = new CycleCovariate(); - random = GenomeAnalysisEngine.getRandomGenerator(); covariate.initialize(RAC); } @Test(enabled = true) public void testSimpleCycles() { - short readLength = 10; - byte[] quals = ReadUtils.createRandomReadQuals(readLength); - byte[] bbases = ReadUtils.createRandomReadBases(readLength, true); - GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + short readLength = 10; + GATKSAMRecord read = ReadUtils.createRandomRead(readLength); read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); read.getReadGroup().setPlatform("illumina"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java new file mode 100644 index 000000000..6276022d1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.BitSet; + +/** + * @author Mauricio Carneiro + * @since 3/1/12 + */ +public class ReadGroupCovariateUnitTest { + ReadGroupCovariate covariate; + RecalibrationArgumentCollection RAC; + + @BeforeClass + public void init() { + RAC = new RecalibrationArgumentCollection(); + covariate = new ReadGroupCovariate(); + covariate.initialize(RAC); + } + + @Test(enabled = true) + public void testSingleRecord() { + final String expected = "SAMPLE.1"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); + rg.setSample("SAMPLE"); + rg.setLane("1"); + runTest(rg, expected); + } + + @Test(enabled = true) + public void testMissingLane() { + final String expected = "SAMPLE.7"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.7"); + rg.setSample("SAMPLE"); + runTest(rg, expected); + } + + @Test(enabled = true) + public void testMissingSample() { + final String expected = "MY.ID"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); + rg.setLane("1"); + runTest(rg, expected); + } + + private void runTest(GATKSAMReadGroupRecord rg, String expected) { + GATKSAMRecord read = ReadUtils.createRandomRead(10); + read.setReadGroup(rg); + CovariateValues values = covariate.getValues(read); + verifyCovariateArray(values.getMismatches(), expected); + + } + + private void verifyCovariateArray(BitSet[] values, String expected) { + for (BitSet value : values) { + String actual = covariate.keyFromBitSet(value); + Assert.assertEquals(actual, expected); + } + } + +} From 9f74969e3a3255ebd4ef1797a1e7ffac4b173d18 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Mar 2012 18:00:30 -0400 Subject: [PATCH 26/29] BQSR with GATKReport implementation * restructured BQSR to report recalibrated tables. * implemented empirical quality calculation to the BQSR stage (instead of on-the-fly recalibration) * linked quality score quantization to the BQSR stage, outputting a quantization histogram * included the arguments used in BQSR to the GATK Report * included all three tables (RG, QUAL and COVARIATES) to the GATK Report with empirical qualities On-the-fly recalibration with GATK Report * loads all tables from the GATKReport using existing infrastructure (with minor updates) * implemented initialiazation of the covariates using BQSR's argument list * reduced memory usage significantly by loading only the empirical quality and estimated quality reported for each bit set key * applied quality quantization to the base recalibration * excluded low quality bases from on-the-fly recalibration for mismatches, insertions or deletions --- .../sting/gatk/report/GATKReportTable.java | 3 +- .../gatk/walkers/bqsr/BQSRKeyManager.java | 48 +- .../gatk/walkers/bqsr/CycleCovariate.java | 2 +- .../walkers/bqsr/QualityScoreCovariate.java | 4 +- .../gatk/walkers/bqsr/RecalDataManager.java | 258 +++- .../sting/gatk/walkers/bqsr/RecalDatum.java | 4 + .../bqsr/RecalibrationArgumentCollection.java | 15 +- .../walkers/recalibration/EmpiricalQual.java | 55 + .../recalibration/BaseRecalibration.java | 531 ++++--- .../sting/utils/sam/GATKSAMRecord.java | 4 +- .../BaseRecalibrationUnitTest.java | 14 +- public/testdata/exampleCSV.csv | 1362 ----------------- 12 files changed, 571 insertions(+), 1729 deletions(-) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java delete mode 100644 public/testdata/exampleCSV.csv diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index e45ffc486..62c36ca6c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -431,8 +431,7 @@ public class GATKReportTable { } catch (Exception e) { } } - if (column.getDataType().equals(GATKReportDataType.Byte) && - ((String) value).length() == 1) { + if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) { newValue = ((String) value).charAt(0); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java index a30472ce8..8a9c626eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java @@ -2,10 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BitSetUtils; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR @@ -30,6 +27,7 @@ import java.util.List; public class BQSRKeyManager { private List requiredCovariates; private List optionalCovariates; + private Map covariateNameToIDMap; private int nRequiredBits; // Number of bits used to represent the required covariates private int nOptionalBits; // Number of bits used to represent the standard covaraites @@ -48,6 +46,7 @@ public class BQSRKeyManager { public BQSRKeyManager(List requiredCovariates, List optionalCovariates) { this.requiredCovariates = new ArrayList(requiredCovariates.size()); // initialize the required covariates list this.optionalCovariates = new ArrayList(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay) + this.covariateNameToIDMap = new HashMap(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates) nRequiredBits = 0; for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management @@ -57,14 +56,16 @@ public class BQSRKeyManager { nRequiredBits += nBits; } - short i = 0; + short id = 0; nOptionalBits = 0; for (Covariate optional : optionalCovariates) { int nBits = optional.numberOfBits(); // number of bits used by this covariate nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate - BitSet optionalID = BitSetUtils.bitSetFrom(i); // calculate the optional covariate ID for this covariate + BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object - i++; + String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport + this.covariateNameToIDMap.put(covariateName, id); + id++; } nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID @@ -92,7 +93,7 @@ public class BQSRKeyManager { * @return one key in bitset representation per covariate */ public List bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) { - List allBitSets = new LinkedList(); // Generate one key per optional covariate + List allBitSets = new LinkedList(); // Generate one key per optional covariate BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits @@ -147,7 +148,7 @@ public class BQSRKeyManager { if (optionalCovariates.size() > 0) { int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's - int covariateID = (Short) key[covariateIDIndex]; // get the optional covariate id + int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface @@ -162,7 +163,17 @@ public class BQSRKeyManager { return bitSetKey; } - + + /** + * Covariate id can be either the covariate name (String) or the actual id (short). This method + * finds it's type and converts accordingly to the short notation. + * + * @param id the string or short representation of the optional covariate id + * @return the short representation of the optional covariate id. + */ + private short parseCovariateID(Object id) { + return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id; + } /** * Generates a key set of objects from a combined bitset key. @@ -185,13 +196,27 @@ public class BQSRKeyManager { short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set - objectKeys.add(id); // add the covariate id + objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id } objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set return objectKeys; } + public List getRequiredCovariates() { + ArrayList list = new ArrayList(requiredCovariates.size()); + for (RequiredCovariateInfo info : requiredCovariates) + list.add(info.covariate); + return list; + } + + public List getOptionalCovariates() { + ArrayList list = new ArrayList(optionalCovariates.size()); + for (OptionalCovariateInfo info : optionalCovariates) + list.add(info.covariate); + return list; + } + /** * Translates a masked bitset into a bitset starting at 0 * @@ -253,7 +278,6 @@ public class BQSRKeyManager { return chopNBitsFrom(bitSet, leadingBits); } - /** * Aggregate information for each Covariate */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 3f3bc5040..7bc6cd754 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -188,7 +188,7 @@ public class CycleCovariate implements StandardCovariate { @Override public BitSet bitSetFromKey(Object key) { - return BitSetUtils.bitSetFrom((Short) key); + return (key instanceof String) ? BitSetUtils.bitSetFrom(Short.parseShort((String) key)) : BitSetUtils.bitSetFrom((Short) key); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java index cd2253e1a..4100eb8bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java @@ -79,8 +79,8 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public BitSet bitSetFromKey(Object key) { - return BitSetUtils.bitSetFrom((Byte) key); + public BitSet bitSetFromKey(Object key) { + return (key instanceof String) ? BitSetUtils.bitSetFrom(Byte.parseByte((String) key)) : BitSetUtils.bitSetFrom((Byte) key); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 5d1adaf40..742be4bbd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -26,10 +26,15 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import net.sf.samtools.SAMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; @@ -37,10 +42,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -53,18 +55,29 @@ import java.util.Map; */ public class RecalDataManager { - public final NestedHashMap nestedHashMap; // The full dataset - private final HashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed - private final HashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed - private final HashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed + public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments"; + public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized"; + public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0"; + public final static String QUALITY_SCORE_REPORT_TABLE_TITLE = "RecalTable1"; + public final static String ALL_COVARIATES_REPORT_TABLE_TITLE = "RecalTable2"; + + public final static String ARGUMENT_VALUE_COLUMN_NAME = "Value"; + public final static String QUANTIZED_VALUE_COLUMN_NAME = "QuantizedScore"; + public final static String READGROUP_COLUMN_NAME = "ReadGroup"; + public final static String EVENT_TYPE_COLUMN_NAME = "EventType"; + public final static String EMPIRICAL_QUALITY_COLUMN_NAME = "EmpiricalQuality"; + public final static String ESTIMATED_Q_REPORTED_COLUMN_NAME = "EstimatedQReported"; + public final static String QUALITY_SCORE_COLUMN_NAME = "QualityScore"; + public final static String COVARIATE_VALUE_SCORE_COLUMN_NAME = "CovariateValue"; + public final static String COVARIATE_NAME_COLUMN_NAME = "CovariateName"; - public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private static boolean warnUserNullPlatform = false; - private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ + + public enum SOLID_RECAL_MODE { /** @@ -82,7 +95,20 @@ public class RecalDataManager { /** * Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference. */ - REMOVE_REF_BIAS + REMOVE_REF_BIAS; + + public static SOLID_RECAL_MODE recalModeFromString(String recalMode) { + if (recalMode.equals("DO_NOTHING")) + return SOLID_RECAL_MODE.DO_NOTHING; + if (recalMode.equals("SET_Q_ZERO")) + return SOLID_RECAL_MODE.SET_Q_ZERO; + if (recalMode.equals("SET_Q_ZERO_BASE_N")) + return SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N; + if (recalMode.equals("REMOVE_REF_BIAS")) + return SOLID_RECAL_MODE.REMOVE_REF_BIAS; + + throw new UserException.BadArgumentValue(recalMode, "is not a valid SOLID_RECAL_MODE value"); + } } public enum SOLID_NOCALL_STRATEGY { @@ -97,78 +123,125 @@ public class RecalDataManager { /** * Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses. */ - PURGE_READ - } + PURGE_READ; - public RecalDataManager() { - nestedHashMap = new NestedHashMap(); - dataCollapsedReadGroup = null; - dataCollapsedQualityScore = null; - dataCollapsedByCovariate = null; - } + public static SOLID_NOCALL_STRATEGY nocallStrategyFromString(String nocallStrategy) { + if (nocallStrategy.equals("THROW_EXCEPTION")) + return SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; + if (nocallStrategy.equals("LEAVE_READ_UNRECALIBRATED")) + return SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED; + if (nocallStrategy.equals("PURGE_READ")) + return SOLID_NOCALL_STRATEGY.PURGE_READ; - public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) { - if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration - nestedHashMap = null; - dataCollapsedReadGroup = new HashMap(); - dataCollapsedQualityScore = new HashMap(); - dataCollapsedByCovariate = new HashMap>(); - for (final EventType errorModel : EventType.values()) { - dataCollapsedReadGroup.put(errorModel, new NestedHashMap()); - dataCollapsedQualityScore.put(errorModel, new NestedHashMap()); - dataCollapsedByCovariate.put(errorModel, new ArrayList()); - for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate - dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap()); - } - } - } - else { - nestedHashMap = new NestedHashMap(); - dataCollapsedReadGroup = null; - dataCollapsedQualityScore = null; - dataCollapsedByCovariate = null; + throw new UserException.BadArgumentValue(nocallStrategy, "is not a valid SOLID_NOCALL_STRATEGY value"); } } - public static ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(COVARS_ATTRIBUTE); - } + public static void listAvailableCovariates(Logger logger) { + // Get a list of all available covariates + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - - private void checkForSingletons(final Map data) { - // todo -- this looks like it's better just as a data.valueSet() call? - for (Object comp : data.keySet()) { - final Object val = data.get(comp); - if (val instanceof RecalDatum) { // We are at the end of the nested hash maps - if (data.keySet().size() == 1) { - data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ... - // in a previous step of the sequential calculation model - } - } - else { // Another layer in the nested hash map - checkForSingletons((Map) val); - } - } + // Print and exit if that's what was requested + logger.info("Available covariates:"); + for (Class covClass : covariateClasses) + logger.info(covClass.getSimpleName()); + logger.info(""); } /** - * Get the appropriate collapsed table out of the set of all the tables held by this Object - * - * @param covariate Which covariate indexes the desired collapsed HashMap - * @return The desired collapsed HashMap + * Generates two lists : required covariates and optional covariates based on the user's requests. + * + * Performs the following tasks in order: + * 1. Adds all requierd covariates in order + * 2. Check if the user asked to use the standard covariates and adds them all if that's the case + * 3. Adds all covariates requested by the user that were not already added by the two previous steps + * + * @param argumentCollection the argument collection object for the recalibration walker + * @return a pair of ordered lists : required covariates (first) and optional covariates (second) */ - public final NestedHashMap getCollapsedTable(final int covariate, final EventType errorModel) { - if (covariate == 0) { - return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed - } - else if (covariate == 1) { - return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed - } - else { - return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed + public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); + final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); + + ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + ArrayList optionalCovariates = new ArrayList(); + if (argumentCollection.USE_STANDARD_COVARIATES) + optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user + + if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified + for (String requestedCovariateString : argumentCollection.COVARIATES) { + boolean foundClass = false; + for (Class covClass : covariateClasses) { + if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class + foundClass = true; + if (!requiredClasses.contains(covClass) && + (!argumentCollection.USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { + try { + final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it + optionalCovariates.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + } + } + + if (!foundClass) { + throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); + } + } } + return new Pair, ArrayList>(requiredCovariates, optionalCovariates); } + /** + * Initializes the recalibration table -> key manager map + * + * @param requiredCovariates list of required covariates (in order) + * @param optionalCovariates list of optional covariates (in order) + * @return a map with each key manager and it's corresponding recalibration table properly initialized + */ + public static LinkedHashMap> initializeTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { + final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + for (Covariate covariate : requiredCovariates) { + requiredCovariatesToAdd.add(covariate); + final Map recalTable = new HashMap(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively) + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + } + final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + return tablesAndKeysMap; + } + + /** + * Initializes the table -> key manager map (unfortunate copy of the above code with minor modifications to accomodate the different return types (RecalDatum vs EmpiricalQual objects) + * + * @param requiredCovariates list of required covariates (in order) + * @param optionalCovariates list of optional covariates (in order) + * @return a map with each key manager and it's corresponding recalibration table properly initialized + */ + public static LinkedHashMap> initializeEmpiricalTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { + final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + for (Covariate covariate : requiredCovariates) { + requiredCovariatesToAdd.add(covariate); + final Map recalTable = new HashMap(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively) + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + } + final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + return tablesAndKeysMap; + } + + /** * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string * @@ -526,8 +599,9 @@ public class RecalDataManager { * * @param read The read for which to compute covariate values. * @param requestedCovariates The list of requested covariates. + * @return a matrix with all the covariates calculated for every base in the read */ - public static void computeCovariates(final GATKSAMRecord read, final List requestedCovariates) { + public static ReadCovariates computeCovariates(final GATKSAMRecord read, final List requestedCovariates) { final int numRequestedCovariates = requestedCovariates.size(); final int readLength = read.getReadLength(); final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates); @@ -536,7 +610,7 @@ public class RecalDataManager { for (Covariate covariate : requestedCovariates) readCovariates.addCovariate(covariate.getValues(read)); - read.setTemporaryAttribute(COVARS_ATTRIBUTE, readCovariates); + return readCovariates; } /** @@ -613,4 +687,42 @@ public class RecalDataManager { return base; } } + + + /** + * Adds the required covariates to a covariate list + * + * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addRequiredCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + if (classes.size() != 2) + throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); + + dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. + dest.add(new QualityScoreCovariate()); + return dest; + } + + /** + * Adds the standard covariates to a covariate list + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addStandardCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + for (Class covClass : classes) { + try { + final Covariate covariate = (Covariate) covClass.newInstance(); + dest.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + return dest; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index 91f865180..b7f88c524 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -87,6 +87,10 @@ public class RecalDatum extends RecalDatumOptimized { public final void calcCombinedEmpiricalQuality(final int smoothing, final int maxQual) { this.empiricalQuality = empiricalQualDouble(smoothing, maxQual); // cache the value so we don't call log over and over again } + + public final void calcEstimatedReportedQuality() { + this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / (double) numObservations); + } //--------------------------------------------------------------------------------------------------------------- // diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 40f28f644..a33ba8bd0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -50,7 +50,7 @@ public class RecalibrationArgumentCollection { * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument. */ @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false) - protected List> knownSites = Collections.emptyList(); + public List> knownSites = Collections.emptyList(); /** * After the header, data records occur one per line until the end of the file. The first several items on a line are the @@ -60,25 +60,25 @@ public class RecalibrationArgumentCollection { */ @Gather(BQSRGatherer.class) @Output - protected PrintStream RECAL_FILE; + public PrintStream RECAL_FILE; /** * List all implemented covariates. */ @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false) - protected boolean LIST_ONLY = false; + public boolean LIST_ONLY = false; /** * Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list. */ @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false) - protected String[] COVARIATES = null; + public String[] COVARIATES = null; /* * Use the standard set of covariates in addition to the ones listed using the -cov argument */ @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false) - protected boolean USE_STANDARD_COVARIATES = true; + public boolean USE_STANDARD_COVARIATES = true; ///////////////////////////// // Debugging-only Arguments @@ -88,7 +88,7 @@ public class RecalibrationArgumentCollection { */ @Hidden @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.") - protected boolean RUN_WITHOUT_DBSNP = false; + public boolean RUN_WITHOUT_DBSNP = false; /** * CountCovariates and TableRecalibration accept a --solid_recal_mode flag which governs how the recalibrator handles the @@ -152,6 +152,9 @@ public class RecalibrationArgumentCollection { @Hidden @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; + @Hidden + @Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output") + public int QUANTIZING_LEVELS = 16; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java new file mode 100755 index 000000000..e9bfa3513 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java @@ -0,0 +1,55 @@ +package org.broadinstitute.sting.gatk.walkers.recalibration; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: Mar 22, 2012 + * + * Object that holds the empirical quality and estimated reported quality values for on-the-fly recalibration. This is a simplification of the RecalDatum object + */ + +public class EmpiricalQual { + + private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations + private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + + private EmpiricalQual() {} + + public EmpiricalQual(final double estimatedQReported, final double empiricalQuality) { + this.estimatedQReported = estimatedQReported; + this.empiricalQuality = empiricalQuality; + } + + public final double getEstimatedQReported() { + return estimatedQReported; + } + + public final double getEmpiricalQuality() { + return empiricalQuality; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 36674adee..a000732c4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -25,248 +25,267 @@ package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual; import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.regex.Pattern; +import java.util.*; /** * Utility methods to facilitate on-the-fly base quality score recalibration. * - * User: rpoplin + * User: carneiro and rpoplin * Date: 2/4/12 */ public class BaseRecalibration { + private List qualQuantizationMap; // histogram containing the map for qual quantization (calculated after recalibration is done) + private LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager - private ArrayList> collapsedHashes = new ArrayList> (); // All the collapsed data tables + private ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation - private final ArrayList requestedCovariates = new ArrayList(); // List of all covariates to be used in this calculation - private final ArrayList requiredCovariates = new ArrayList(); // List of required covariates to be used in this calculation - private final ArrayList optionalCovariates = new ArrayList(); // List of optional covariates to be used in this calculation - - public static final Pattern REQUIRED_COVARIATE_PATTERN = Pattern.compile("^# Required Covariates.*"); - public static final Pattern OPTIONAL_COVARIATE_PATTERN = Pattern.compile("^# Optional Covariates.*"); - public static final String EOF_MARKER = "EOF"; + private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code"; + private static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here"; - private static final byte SMOOTHING_CONSTANT = 1; - - ArrayList keyManagers = new ArrayList(); + /** + * Should ALWAYS use the constructor with the GATK Report file + */ + private BaseRecalibration() {} + /** + * Constructor using a GATK Report file + * + * @param RECAL_FILE a GATK Report file containing the recalibration information + */ public BaseRecalibration(final File RECAL_FILE) { - // Get a list of all available covariates - final List> classes = new PluginManager(Covariate.class).getPlugins(); - RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // todo -- initialize with the parameters from the csv file! + GATKReport report = new GATKReport(RECAL_FILE); - int lineNumber = 0; + GATKReportTable argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); + RecalibrationArgumentCollection RAC = initializeArgumentCollectionTable(argumentTable); - boolean foundRequiredCovariates = false; - boolean foundOptionalCovariates = false; - boolean initializedKeyManagers = false; + GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); + qualQuantizationMap = initializeQuantizationTable(quantizedTable); - // Read in the data from the csv file and populate the data map and covariates list - boolean sawEOF = false; - try { - for (String line : new XReadLines(RECAL_FILE)) { - lineNumber++; + Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + ArrayList requiredCovariates = covariates.getFirst(); + ArrayList optionalCovariates = covariates.getSecond(); + requestedCovariates.addAll(requiredCovariates); // add all required covariates to the list of requested covariates + requestedCovariates.addAll(optionalCovariates); // add all optional covariates to the list of requested covariates - sawEOF = EOF_MARKER.equals(line); - if (sawEOF) - break; - - boolean requiredCovariatesLine = REQUIRED_COVARIATE_PATTERN.matcher(line).matches(); - boolean optionalCovariatesLine = OPTIONAL_COVARIATE_PATTERN.matcher(line).matches(); - - if (requiredCovariatesLine && foundRequiredCovariates) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate required covariates line"); - - if (optionalCovariatesLine && foundOptionalCovariates) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate optional covariates line"); - - if (optionalCovariatesLine && !foundRequiredCovariates) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Optional covariates reported before Required covariates"); - - if (requiredCovariatesLine || optionalCovariatesLine) { - String [] covariateNames = line.split(": ")[1].split(","); // take the second half of the string (past the ":") and split it by "," to get the list of required covariates - - List covariateList = requiredCovariatesLine ? requiredCovariates : optionalCovariates; // set the appropriate covariate list to update - - for (String covariateName : covariateNames) { - boolean foundClass = false; - for (Class covClass : classes) { - if ((covariateName + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) { - foundClass = true; - try { - Covariate covariate = (Covariate) covClass.newInstance(); - covariate.initialize(RAC); - requestedCovariates.add(covariate); - covariateList.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - if (!foundClass) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (covariateName + "Covariate") + ") isn't a valid covariate option."); - } - foundRequiredCovariates = foundRequiredCovariates || requiredCovariatesLine; - foundOptionalCovariates = foundOptionalCovariates || optionalCovariatesLine; - } - - else if (!line.startsWith("#")) { // if this is not a comment line that we don't care about, it is DATA! - if (!foundRequiredCovariates || !foundOptionalCovariates) // At this point all the covariates should have been found and initialized - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE); - - if (!initializedKeyManagers) { - ArrayList emptyList = new ArrayList(0); - ArrayList requiredCovariatesUpToThis = new ArrayList(); // Initialize one key manager for each table of required covariate - for (Covariate covariate : requiredCovariates) { // Every required covariate table includes all preceding required covariates (e.g. RG ; RG,Q ) - requiredCovariatesUpToThis.add(covariate); - keyManagers.add(new BQSRKeyManager(requiredCovariatesUpToThis, emptyList)); - } - keyManagers.add(new BQSRKeyManager(requiredCovariates, optionalCovariates)); // One master key manager for the collapsed tables - - initializedKeyManagers = true; - } - addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap - } - } - - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e); - } catch (NumberFormatException e) { - throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker."); - } - - if (!sawEOF) { - final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool."; - throw new UserException.MalformedFile(RECAL_FILE, errorMessage); - } - - generateEmpiricalQualities(SMOOTHING_CONSTANT); - } - - - /** - * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches) - * - * @param file The CSV file we read the line from (for exception throwing purposes) - * @param line A line of CSV data read from the recalibration table data file - */ - private void addCSVData(final File file, final String line) { - final String[] vals = line.split(","); - boolean hasOptionalCovariates = optionalCovariates.size() > 0; // Do we have optional covariates in this key? - int addOptionalCovariates = hasOptionalCovariates ? 2 : 0; // If we have optional covariates at all, add two to the size of the array (to acommodate the covariate and the id) - final Object[] key = new Object[requiredCovariates.size() + addOptionalCovariates + 1]; // Reserve enough space for the required covariates, optional covariate, id and eventType - - int indexCovariateValue = key.length - 3; // In the order of keys, the optional covariate comes right after the required covariates - int indexCovariateID = key.length - 2; // followed by the covariate ID - int indexEventType = key.length - 1; // and the event type - - addKeysToArray(key, vals, requiredCovariates, 0); // Add the required covariates keys - - if (hasOptionalCovariates) { - key[indexCovariateID] = Short.parseShort(vals[indexCovariateID]); // Add the optional covariate ID - Covariate covariate = optionalCovariates.get((Short) key[indexCovariateID]); // Get the covariate object for this ID - key[indexCovariateValue] = covariate.getValue(vals[indexCovariateValue]); // Add the optional covariate value, given the ID - } - key[indexEventType] = EventType.eventFrom(vals[indexEventType]); // Add the event type - - int datumIndex = key.length; // The recal datum starts at the end of the key (after the event type) - long count = Long.parseLong(vals[datumIndex]); // Number of observations - long errors = Long.parseLong(vals[datumIndex + 1]); // Number of errors observed - double reportedQual = Double.parseDouble(vals[1]); // The reported Q score --> todo -- I don't like having the Q score hard coded in vals[1]. Generalize it! - final RecalDatum datum = new RecalDatum(count, errors, reportedQual, 0.0); // Create a new datum using the number of observations, number of mismatches, and reported quality score - - addToAllTables(key, datum); // Add that datum to all the collapsed tables which will be used in the sequential calculation - } - - /** - * Add the given mapping to all of the collapsed hash tables - * - * @param key The list of comparables that is the key for this mapping - * @param fullDatum The RecalDatum which is the data for this mapping - */ - private void addToAllTables(final Object[] key, final RecalDatum fullDatum) { - int nHashes = requiredCovariates.size(); // We will always need one hash per required covariate - if (optionalCovariates.size() > 0) // If we do have optional covariates - nHashes += 1; // we will need one extra hash table with the optional covariate encoded in the key set on top of the required covariates + for (Covariate cov : requestedCovariates) + cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - - for (int hashIndex = 0; hashIndex < nHashes; hashIndex++) { - HashMap table; // object to hold the hash table we are going to manipulate - if (hashIndex >= collapsedHashes.size()) { // if we haven't yet created the collapsed hash table for this index, create it now! - table = new HashMap(); - collapsedHashes.add(table); // Because this is the only place where we add tables to the ArrayList, they will always be in the order we want. - } - else - table = collapsedHashes.get(hashIndex); // if the table has been previously created, just assign it to the "table" object for manipulation - - int copyTo = hashIndex + 1; // this will copy the covariates up to the index of the one we are including now (1 for RG, 2 for QS,...) - if (copyTo > requiredCovariates.size()) // only in the case where we have optional covariates we need to increase the size of the array - copyTo = requiredCovariates.size() + 2; // if we have optional covarites, add the optional covariate and it's id to the size of the key - Object[] tableKey = new Object[copyTo + 1]; // create a new array that will hold as many keys as hashIndex (1 for RG hash, 2 for QualityScore hash, 3 for covariate hash plus the event type - System.arraycopy(key, 0, tableKey, 0, copyTo); // copy the keys for the corresponding covariates into the tableKey. - tableKey[tableKey.length-1] = key[key.length - 1]; // add the event type. The event type is always the last key, on both key sets. + keysAndTablesMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + for (Covariate covariate : requiredCovariates) { + requiredCovariatesToAdd.add(covariate); + final Map table; // initializing a new recal table for each required covariate (cumulatively) + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager - BitSet hashKey = keyManagers.get(hashIndex).bitSetFromKey(tableKey); // Add bitset key with fullDatum to the appropriate hash - RecalDatum datum = table.get(hashKey); - if (datum == null) - datum = fullDatum; - else if (hashIndex == 0) // Special case for the ReadGroup covariate - datum.combine(fullDatum); + int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) + if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table + final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); + table = parseReadGroupTable(keyManager, reportTable); + } + else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table + final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE); + table = parseQualityScoreTable(keyManager, reportTable); + } else - datum.increment(fullDatum); - table.put(hashKey, datum); + throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION); + + keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map } + + + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager + final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); + final Map table = parseAllCovariatesTable(keyManager, reportTable); + keysAndTablesMap.put(keyManager, table); // adding the pair table+key to the map } /** - * Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score - * that will be used in the sequential calculation in TableRecalibrationWalker + * Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table * - * @param smoothing The smoothing parameter that goes into empirical quality score calculation + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private void generateEmpiricalQualities(final int smoothing) { - for (final HashMap table : collapsedHashes) - for (final RecalDatum datum : table.values()) - datum.calcCombinedEmpiricalQuality(smoothing, QualityUtils.MAX_QUAL_SCORE); + private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + ArrayList columnNamesOrderedList = new ArrayList(5); + columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_SCORE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); + return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); } + /** + * + * Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. + */ + private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + ArrayList columnNamesOrderedList = new ArrayList(3); + columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); + return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); + } + /** + * Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table + * + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. + */ + private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + ArrayList columnNamesOrderedList = new ArrayList(2); + columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); + return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); + } + /** + * Shared parsing functionality for all tables. + * + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. + */ + private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList columnNamesOrderedList) { + Map result = new HashMap(reportTable.getNumRows()*2); - public void recalibrateRead(final GATKSAMRecord read) { - //compute all covariate values for this read - RecalDataManager.computeCovariates(read, requestedCovariates); - final ReadCovariates readCovariates = RecalDataManager.covariateKeySetFrom(read); + for (Object primaryKey : reportTable.getPrimaryKeys()) { + int nKeys = columnNamesOrderedList.size(); + Object [] keySet = new Object[nKeys]; + for (int i = 0; i < nKeys; i++) + keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) + keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager). + BitSet bitKey = keyManager.bitSetFromKey(keySet); - for (final EventType errorModel : EventType.values()) { + double estimatedQReported = (Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME); + double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + EmpiricalQual empiricalQual = new EmpiricalQual(estimatedQReported, empiricalQuality); + + result.put(bitKey, empiricalQual); + } + return result; + } + + /** + * Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores + * + * @param table the GATKReportTable containing the quantization mappings + * @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE + */ + private List initializeQuantizationTable(GATKReportTable table) { + Byte[] result = new Byte[QualityUtils.MAX_QUAL_SCORE + 1]; + for (Object primaryKey : table.getPrimaryKeys()) { + Object value = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + byte originalQual = Byte.parseByte(primaryKey.toString()); + byte quantizedQual = Byte.parseByte(value.toString()); + result[originalQual] = quantizedQual; + } + return Arrays.asList(result); + } + + /** + * Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values + * + * @param table the GATKReportTable containing the arguments and its corresponding values + * @return a RAC object properly initialized with all the objects in the table + */ + private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) { + RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + + for (Object primaryKey : table.getPrimaryKeys()) { + Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + if (value.equals("null")) + value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport + + if (primaryKey.equals("covariate") && value != null) + RAC.COVARIATES = value.toString().split(","); + + else if (primaryKey.equals("standard_covs")) + RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); + + else if (primaryKey.equals("solid_recal_mode")) + RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); + + else if (primaryKey.equals("solid_nocall_strategy")) + RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); + + else if (primaryKey.equals("mismatches_context_size")) + RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); + + else if (primaryKey.equals("insertions_context_size")) + RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value); + + else if (primaryKey.equals("deletions_context_size")) + RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value); + + else if (primaryKey.equals("mismatches_default_quality")) + RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value); + + else if (primaryKey.equals("insertions_default_quality")) + RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value); + + else if (primaryKey.equals("deletions_default_quality")) + RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value); + + else if (primaryKey.equals("low_quality_tail")) + RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value); + + else if (primaryKey.equals("default_platform")) + RAC.DEFAULT_PLATFORM = (String) value; + + else if (primaryKey.equals("force_platform")) + RAC.FORCE_PLATFORM = (String) value; + + else if (primaryKey.equals("quantizing_levels")) + RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value); + } + + return RAC; + } + + /** + * Recalibrates the base qualities of a read + * + * It updates the base qualities of the read with the new recalibrated qualities (for all event types) + * + * @param read the read to recalibrate + */ + public void recalibrateRead(final GATKSAMRecord read) { + final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); // compute all covariates for the read + for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings final byte[] originalQuals = read.getBaseQualities(errorModel); final byte[] recalQuals = originalQuals.clone(); - // For each base in the read - for (int offset = 0; offset < read.getReadLength(); offset++) { - final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); - final byte qualityScore = performSequentialQualityCalculation(keySet, errorModel); + for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read + byte qualityScore = originalQuals[offset]; + + if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model + qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base + } recalQuals[offset] = qualityScore; } - - preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low read.setBaseQualities(recalQuals, errorModel); } } @@ -286,86 +305,66 @@ public class BaseRecalibration { * * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) * - * todo -- I extremely dislike the way all this math is hardcoded... should rethink the data structures for this method in particular. - * - * @param key The list of Comparables that were calculated from the covariates + * @param key The list of Comparables that were calculated from the covariates * @param errorModel the event type * @return A recalibrated quality score as a byte */ private byte performSequentialQualityCalculation(BitSet[] key, EventType errorModel) { final byte qualFromRead = (byte) BitSetUtils.shortFrom(key[1]); - - final int readGroupKeyIndex = 0; - final int qualKeyIndex = 1; - final int covariatesKeyIndex = 2; - - // The global quality shift (over the read group only) - List bitKeys = keyManagers.get(readGroupKeyIndex).bitSetsFromAllKeys(key, errorModel); - if (bitKeys.size() > 1) - throw new ReviewedStingException("There should only be one key for the RG collapsed table, something went wrong here"); - - final RecalDatum globalRecalDatum = collapsedHashes.get(readGroupKeyIndex).get(bitKeys.get(0)); + double globalDeltaQ = 0.0; - if (globalRecalDatum != null) { - final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality(); - final double aggregrateQReported = globalRecalDatum.getEstimatedQReported(); - globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; - } - - // The shift in quality between reported and empirical - bitKeys = keyManagers.get(qualKeyIndex).bitSetsFromAllKeys(key, errorModel); - if (bitKeys.size() > 1) - throw new ReviewedStingException("There should only be one key for the Qual collapsed table, something went wrong here"); - - final RecalDatum qReportedRecalDatum = collapsedHashes.get(qualKeyIndex).get(bitKeys.get(0)); double deltaQReported = 0.0; - if (qReportedRecalDatum != null) { - final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality(); - deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; - } - - // The shift in quality due to each covariate by itself in turn - bitKeys = keyManagers.get(covariatesKeyIndex).bitSetsFromAllKeys(key, errorModel); double deltaQCovariates = 0.0; - double deltaQCovariateEmpirical; - for (BitSet k : bitKeys) { - final RecalDatum covariateRecalDatum = collapsedHashes.get(covariatesKeyIndex).get(k); - if (covariateRecalDatum != null) { - deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality(); - deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); + + for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { + BQSRKeyManager keyManager = mapEntry.getKey(); + Map table = mapEntry.getValue(); + + switch(keyManager.getRequiredCovariates().size()) { + case 1: // this is the ReadGroup table + List bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group + if (bitKeys.size() > 1) + throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); + + final EmpiricalQual empiricalQualRG = table.get(bitKeys.get(0)); + if (empiricalQualRG != null) { + final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(); + final double aggregrateQReported = empiricalQualRG.getEstimatedQReported(); + globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; + } + break; + case 2: + if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table + bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the reported quality score + if (bitKeys.size() > 1) + throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); + + final EmpiricalQual empiricalQualQS = table.get(bitKeys.get(0)); + if (empiricalQualQS != null) { + final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(); + deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; + } + } + else { // this is the table with all the covariates + bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn + for (BitSet k : bitKeys) { + final EmpiricalQual empiricalQualCO = table.get(k); + if (empiricalQualCO != null) { + double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); + deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); + } + } + } + break; + default: + throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION); } } - final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; - return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE); + double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula + recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL + + return qualQuantizationMap.get((int) recalibratedQual); // return the quantized version of the recalibrated quality } - /** - * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold - * - * @param originalQuals The list of original base quality scores - * @param recalQuals A list of the new recalibrated quality scores - */ - private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) { - for (int iii = 0; iii < recalQuals.length; iii++) { - if (originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter - recalQuals[iii] = originalQuals[iii]; - } - } - } - - /** - * Shared functionality to add keys - * - * @param array the target array we are creating the keys in - * @param keys the actual keys we're using as a source - * @param covariateList the covariate list to loop through - * @param keyIndex the index in the keys and the arrays objects to run from - */ - private void addKeysToArray(final Object[] array, final String[] keys, List covariateList, int keyIndex) { - for (Covariate covariate : covariateList) { - array[keyIndex] = covariate.getValue(keys[keyIndex]); - keyIndex++; - } - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 6b43479dc..51c3715f3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -52,8 +52,8 @@ public class GATKSAMRecord extends BAMRecord { public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end // Base Quality Score Recalibrator specific attribute tags - public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; - public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; + public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions + public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions // the SAMRecord data we're caching private String mReadString = null; diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java index 3e50a5fd1..a372ef3f0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java @@ -1,5 +1,10 @@ package org.broadinstitute.sting.utils.recalibration; +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.utils.NGSPlatform; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.annotations.Test; import java.io.File; @@ -12,10 +17,13 @@ import java.io.File; */ public class BaseRecalibrationUnitTest { - @Test(enabled=true) - public void testReadingCSV() { - File csv = new File("public/testdata/exampleCSV.csv"); + @Test(enabled=false) + public void testReadingReport() { + File csv = new File("public/testdata/exampleGATKREPORT.grp"); BaseRecalibration baseRecalibration = new BaseRecalibration(csv); + GATKSAMRecord read = ReadUtils.createRandomRead(1000); + read.setReadGroup(new GATKSAMReadGroupRecord(new SAMReadGroupRecord("exampleBAM.bam.bam"), NGSPlatform.ILLUMINA)); + baseRecalibration.recalibrateRead(read); System.out.println("Success"); } } diff --git a/public/testdata/exampleCSV.csv b/public/testdata/exampleCSV.csv deleted file mode 100644 index 4bd052195..000000000 --- a/public/testdata/exampleCSV.csv +++ /dev/null @@ -1,1362 +0,0 @@ -# Counted Sites 312 -# Counted Bases 380 -# Skipped Sites 0 -# Fraction Skipped 1 / Infinity bp -# Required Covariates (in order): ReadGroup,QualityScore -# Optional Covariates (in order): Context,Cycle -# Recalibration Data (in order): CovariateID,EventType,nObservations,nMismatches,Qempirical -exampleBAM.bam,45,TGAAAGTG,0,D,1,0,40 -exampleBAM.bam,45,TGGTATTA,0,D,1,0,40 -exampleBAM.bam,45,AGCCTCGT,0,D,1,0,40 -exampleBAM.bam,45,CTGTGTCT,0,D,1,0,40 -exampleBAM.bam,45,CTTTGTAT,0,I,1,0,40 -exampleBAM.bam,45,CTTAAGTG,0,D,1,0,40 -exampleBAM.bam,45,CTTTATTA,0,D,1,0,40 -exampleBAM.bam,45,23,1,I,5,0,40 -exampleBAM.bam,45,27,1,D,5,0,40 -exampleBAM.bam,45,ATTCTATT,0,I,1,0,40 -exampleBAM.bam,45,CTAATCTC,0,I,1,0,40 -exampleBAM.bam,34,GC,0,M,2,0,40 -exampleBAM.bam,8,TG,0,M,3,0,40 -exampleBAM.bam,45,TAGAGTTT,0,I,1,0,40 -exampleBAM.bam,9,TA,0,M,1,0,40 -exampleBAM.bam,45,GGTTCGGG,0,I,3,0,40 -exampleBAM.bam,45,AGTTTCAC,0,I,1,0,40 -exampleBAM.bam,45,CATTTCAC,0,I,1,0,40 -exampleBAM.bam,16,7,1,M,1,0,40 -exampleBAM.bam,5,76,1,M,1,0,40 -exampleBAM.bam,45,CATGATAA,0,D,1,0,40 -exampleBAM.bam,45,53,1,I,5,0,40 -exampleBAM.bam,45,57,1,D,5,0,40 -exampleBAM.bam,25,52,1,M,1,0,40 -exampleBAM.bam,45,TGGCAGCC,0,D,1,0,40 -exampleBAM.bam,33,CT,0,M,6,0,40 -exampleBAM.bam,45,AAGTGACA,0,I,1,0,40 -exampleBAM.bam,45,AGTGACAT,0,I,1,0,40 -exampleBAM.bam,45,AGAGTTTC,0,I,1,0,40 -exampleBAM.bam,45,CTCTTTGT,0,D,1,0,40 -exampleBAM.bam,45,GCCTGAAA,0,D,1,0,40 -exampleBAM.bam,12,25,1,M,1,0,40 -exampleBAM.bam,34,75,1,M,1,0,40 -exampleBAM.bam,32,41,1,M,2,0,40 -exampleBAM.bam,21,GG,0,M,2,0,40 -exampleBAM.bam,26,50,1,M,1,0,40 -exampleBAM.bam,45,ACCTGGAG,0,D,1,0,40 -exampleBAM.bam,45,CACAGCAA,0,D,1,0,40 -exampleBAM.bam,20,GA,0,M,1,0,40 -exampleBAM.bam,45,AGGTGGAG,0,D,1,0,40 -exampleBAM.bam,45,GCAAAATC,0,I,1,0,40 -exampleBAM.bam,27,TA,0,M,4,0,40 -exampleBAM.bam,27,18,1,M,1,0,40 -exampleBAM.bam,32,CC,0,M,1,0,40 -exampleBAM.bam,45,AAAATCTA,0,I,1,0,40 -exampleBAM.bam,45,22,1,I,5,0,40 -exampleBAM.bam,45,26,1,D,5,0,40 -exampleBAM.bam,33,76,1,M,1,0,40 -exampleBAM.bam,30,24,1,M,1,0,40 -exampleBAM.bam,45,TTCTATTC,0,D,1,0,40 -exampleBAM.bam,45,GTCAATGT,0,I,1,0,40 -exampleBAM.bam,21,73,1,M,1,0,40 -exampleBAM.bam,17,4,1,M,1,0,40 -exampleBAM.bam,8,17,1,M,1,0,40 -exampleBAM.bam,34,GA,0,M,1,0,40 -exampleBAM.bam,45,ATCGTGAG,0,I,1,0,40 -exampleBAM.bam,45,CCAGATCC,0,I,1,0,40 -exampleBAM.bam,45,GATCGTGA,0,D,1,0,40 -exampleBAM.bam,45,52,1,I,5,0,40 -exampleBAM.bam,45,56,1,D,5,0,40 -exampleBAM.bam,9,TC,0,M,1,0,40 -exampleBAM.bam,23,CT,0,M,2,0,40 -exampleBAM.bam,31,26,1,M,2,0,40 -exampleBAM.bam,45,ATGTGAAC,0,D,1,0,40 -exampleBAM.bam,45,ATTACTCT,0,I,1,0,40 -exampleBAM.bam,45,ACACAGCA,0,D,1,0,40 -exampleBAM.bam,26,TT,0,M,1,0,40 -exampleBAM.bam,45,GGGTTTGG,0,D,2,0,40 -exampleBAM.bam,33,8,1,M,1,0,40 -exampleBAM.bam,21,GT,0,M,2,0,40 -exampleBAM.bam,34,74,1,M,1,0,40 -exampleBAM.bam,45,ATTCTTAA,0,I,1,0,40 -exampleBAM.bam,45,GAGCCTTT,0,D,1,0,40 -exampleBAM.bam,20,GC,0,M,1,0,40 -exampleBAM.bam,45,GGTTAGGG,0,D,2,0,40 -exampleBAM.bam,33,42,1,M,1,0,40 -exampleBAM.bam,45,GTGCAAAG,0,I,1,0,40 -exampleBAM.bam,6,75,1,M,1,0,40 -exampleBAM.bam,27,TC,0,M,1,0,40 -exampleBAM.bam,32,CA,0,M,2,0,40 -exampleBAM.bam,29,60,1,M,1,0,40 -exampleBAM.bam,34,13,1,M,1,0,40 -exampleBAM.bam,34,GT,0,M,2,0,40 -exampleBAM.bam,21,74,1,M,1,0,40 -exampleBAM.bam,45,GTTAATGA,0,I,1,0,40 -exampleBAM.bam,45,TATTATTG,0,D,1,0,40 -exampleBAM.bam,24,52,1,M,1,0,40 -exampleBAM.bam,45,CTTTCAGG,0,I,1,0,40 -exampleBAM.bam,45,GACATGGT,0,D,1,0,40 -exampleBAM.bam,45,ATCATGGT,0,D,1,0,40 -exampleBAM.bam,45,21,1,I,5,0,40 -exampleBAM.bam,45,25,1,D,5,0,40 -exampleBAM.bam,34,47,1,M,1,0,40 -exampleBAM.bam,31,25,1,M,1,0,40 -exampleBAM.bam,19,71,1,M,1,0,40 -exampleBAM.bam,6,GG,0,M,4,1,6 -exampleBAM.bam,9,16,1,M,1,0,40 -exampleBAM.bam,45,TCCAGTTC,0,I,1,0,40 -exampleBAM.bam,45,TTCACATG,0,D,1,0,40 -exampleBAM.bam,45,TAAGTGAC,0,I,1,0,40 -exampleBAM.bam,45,GTGACATG,0,D,1,0,40 -exampleBAM.bam,45,55,1,I,5,0,40 -exampleBAM.bam,45,59,1,D,5,0,40 -exampleBAM.bam,45,CATGATCG,0,I,1,0,40 -exampleBAM.bam,16,AT,0,M,1,0,40 -exampleBAM.bam,32,43,1,M,3,0,40 -exampleBAM.bam,19,33,1,M,1,0,40 -exampleBAM.bam,21,GA,0,M,2,0,40 -exampleBAM.bam,45,GTATTTGC,0,D,1,0,40 -exampleBAM.bam,26,TA,0,M,1,0,40 -exampleBAM.bam,45,TCTTAAGT,0,D,1,0,40 -exampleBAM.bam,33,CC,0,M,1,0,40 -exampleBAM.bam,11,20,1,M,1,0,40 -exampleBAM.bam,28,61,1,M,1,0,40 -exampleBAM.bam,18,1,1,M,1,0,40 -exampleBAM.bam,45,ACCCAGAT,0,I,1,0,40 -exampleBAM.bam,45,AAAGACAC,0,I,1,0,40 -exampleBAM.bam,45,GCCTTTGC,0,D,1,0,40 -exampleBAM.bam,27,16,1,M,1,0,40 -exampleBAM.bam,27,TG,0,M,2,0,40 -exampleBAM.bam,32,CT,0,M,1,0,40 -exampleBAM.bam,21,44,1,M,1,0,40 -exampleBAM.bam,45,TATTACTC,0,I,1,0,40 -exampleBAM.bam,45,TGGGCTGG,0,I,1,0,40 -exampleBAM.bam,16,65,1,M,1,0,40 -exampleBAM.bam,34,GG,0,M,2,0,40 -exampleBAM.bam,25,21,1,M,1,0,40 -exampleBAM.bam,22,9,1,M,1,0,40 -exampleBAM.bam,45,CAGGCCAC,0,D,1,0,40 -exampleBAM.bam,45,20,1,I,5,0,40 -exampleBAM.bam,45,24,1,D,5,0,40 -exampleBAM.bam,30,26,1,M,1,0,40 -exampleBAM.bam,45,TTGTATTT,0,D,1,0,40 -exampleBAM.bam,24,53,1,M,1,0,40 -exampleBAM.bam,23,CC,0,M,1,0,40 -exampleBAM.bam,19,70,1,M,1,1,1 -exampleBAM.bam,25,55,1,M,1,0,40 -exampleBAM.bam,45,AGGCCACC,0,I,1,0,40 -exampleBAM.bam,45,54,1,I,5,0,40 -exampleBAM.bam,45,58,1,D,5,0,40 -exampleBAM.bam,45,ACTTTCAG,0,I,1,0,40 -exampleBAM.bam,45,AAAGTGCA,0,D,1,0,40 -exampleBAM.bam,45,ATTGATAT,0,D,1,0,40 -exampleBAM.bam,45,AATGTGAA,0,I,1,0,40 -exampleBAM.bam,9,TT,0,M,1,0,40 -exampleBAM.bam,19,32,1,M,1,0,40 -exampleBAM.bam,29,28,1,M,1,0,40 -exampleBAM.bam,45,CGGGTTTG,0,I,2,0,40 -exampleBAM.bam,45,TCTTTGTA,0,I,1,0,40 -exampleBAM.bam,33,10,1,M,1,0,40 -exampleBAM.bam,33,CA,0,M,2,0,40 -exampleBAM.bam,45,GTTCGGGT,0,I,3,0,40 -exampleBAM.bam,27,TT,0,M,2,0,40 -exampleBAM.bam,27,17,1,M,1,0,40 -exampleBAM.bam,45,CAGCAAAA,0,I,1,0,40 -exampleBAM.bam,45,GGCAGCCT,0,I,1,0,40 -exampleBAM.bam,20,GT,0,M,1,1,1 -exampleBAM.bam,45,TGGAGCCT,0,I,1,0,40 -exampleBAM.bam,45,TGGTGGCC,0,I,1,0,40 -exampleBAM.bam,28,30,1,M,1,0,40 -exampleBAM.bam,33,40,1,M,1,0,40 -exampleBAM.bam,24,TG,0,M,2,0,40 -exampleBAM.bam,45,TGTGTCTT,0,I,1,0,40 -exampleBAM.bam,45,TCAATAAT,0,I,1,0,40 -exampleBAM.bam,45,TCTCCAGG,0,I,1,0,40 -exampleBAM.bam,45,49,1,I,5,0,40 -exampleBAM.bam,45,61,1,D,5,0,40 -exampleBAM.bam,45,CCTCGTCC,0,D,1,0,40 -exampleBAM.bam,45,GGCACCCA,0,I,1,0,40 -exampleBAM.bam,22,44,1,M,2,0,40 -exampleBAM.bam,45,AGGTTATC,0,I,1,0,40 -exampleBAM.bam,34,41,1,M,1,0,40 -exampleBAM.bam,19,65,1,M,1,0,40 -exampleBAM.bam,23,12,1,M,1,0,40 -exampleBAM.bam,23,GG,0,M,1,0,40 -exampleBAM.bam,45,TTGGGTTC,0,I,1,0,40 -exampleBAM.bam,45,TTCTGTGT,0,D,1,0,40 -exampleBAM.bam,45,TGTTGGTT,0,I,1,0,40 -exampleBAM.bam,24,50,1,M,1,0,40 -exampleBAM.bam,45,GTTTCACA,0,I,1,0,40 -exampleBAM.bam,45,TCGGGTTC,0,I,1,0,40 -exampleBAM.bam,45,TAGGGTTC,0,I,1,0,40 -exampleBAM.bam,33,73,1,M,1,0,40 -exampleBAM.bam,9,52,1,M,1,0,40 -exampleBAM.bam,45,19,1,I,5,0,40 -exampleBAM.bam,45,31,1,D,5,0,40 -exampleBAM.bam,25,TA,0,M,3,0,40 -exampleBAM.bam,34,11,1,M,1,0,40 -exampleBAM.bam,34,CC,0,M,1,0,40 -exampleBAM.bam,28,25,1,M,1,0,40 -exampleBAM.bam,45,TAGATTTT,0,I,1,0,40 -exampleBAM.bam,45,GGTTGGGG,0,I,2,0,40 -exampleBAM.bam,45,GGCTGGGG,0,I,1,0,40 -exampleBAM.bam,45,GATTAGAT,0,I,1,0,40 -exampleBAM.bam,5,GG,0,M,3,1,5 -exampleBAM.bam,32,15,1,M,1,0,40 -exampleBAM.bam,27,22,1,M,1,0,40 -exampleBAM.bam,21,42,1,M,1,0,40 -exampleBAM.bam,19,5,1,M,1,0,40 -exampleBAM.bam,19,AT,0,M,1,0,40 -exampleBAM.bam,45,TTTCAGGC,0,D,1,0,40 -exampleBAM.bam,45,TGCCAGGC,0,D,1,0,40 -exampleBAM.bam,45,GTCTTTAT,0,I,1,0,40 -exampleBAM.bam,45,TGAACTGG,0,I,1,0,40 -exampleBAM.bam,26,20,1,M,1,0,40 -exampleBAM.bam,45,TATTCTTA,0,D,1,0,40 -exampleBAM.bam,45,TGATAACC,0,D,1,0,40 -exampleBAM.bam,45,ATTTTTCT,0,D,1,0,40 -exampleBAM.bam,45,GGCTTTAT,0,I,1,0,40 -exampleBAM.bam,5,46,1,M,1,1,1 -exampleBAM.bam,29,27,1,M,1,0,40 -exampleBAM.bam,45,ATCCATTT,0,D,1,0,40 -exampleBAM.bam,45,48,1,I,5,0,40 -exampleBAM.bam,45,60,1,D,5,0,40 -exampleBAM.bam,45,GATCCAGT,0,I,1,0,40 -exampleBAM.bam,45,AATGAGTC,0,D,1,0,40 -exampleBAM.bam,24,TT,0,M,3,1,5 -exampleBAM.bam,45,TCTTTATA,0,I,1,0,40 -exampleBAM.bam,6,CC,0,M,1,0,40 -exampleBAM.bam,23,GT,0,M,2,0,40 -exampleBAM.bam,34,40,1,M,1,0,40 -exampleBAM.bam,45,18,1,I,5,0,40 -exampleBAM.bam,45,30,1,D,5,0,40 -exampleBAM.bam,45,CAAAATCT,0,I,1,0,40 -exampleBAM.bam,22,15,1,M,1,0,40 -exampleBAM.bam,45,CCAGGTTA,0,I,1,0,40 -exampleBAM.bam,45,TCATGGTG,0,I,1,0,40 -exampleBAM.bam,45,TCTAATCT,0,I,1,0,40 -exampleBAM.bam,45,TTGGGTTA,0,I,1,0,40 -exampleBAM.bam,45,TAGGGTTA,0,I,1,0,40 -exampleBAM.bam,45,GTTGGTTA,0,I,1,0,40 -exampleBAM.bam,33,72,1,M,1,0,40 -exampleBAM.bam,31,60,1,M,1,0,40 -exampleBAM.bam,34,CA,0,M,4,0,40 -exampleBAM.bam,45,CCCAGATC,0,D,1,0,40 -exampleBAM.bam,18,36,1,M,1,0,40 -exampleBAM.bam,16,70,1,M,1,0,40 -exampleBAM.bam,45,TGTATTTG,0,I,1,0,40 -exampleBAM.bam,33,46,1,M,1,0,40 -exampleBAM.bam,45,GGTTGGGT,0,I,1,0,40 -exampleBAM.bam,45,GTTTGGGT,0,I,1,0,40 -exampleBAM.bam,45,TTCTAGAG,0,I,1,0,40 -exampleBAM.bam,19,AG,0,M,1,0,40 -exampleBAM.bam,32,GA,0,M,2,0,40 -exampleBAM.bam,32,14,1,M,2,0,40 -exampleBAM.bam,12,62,1,M,1,0,40 -exampleBAM.bam,33,12,1,M,1,0,40 -exampleBAM.bam,45,GGTGGCCT,0,I,1,0,40 -exampleBAM.bam,4,GC,0,M,1,0,40 -exampleBAM.bam,27,53,1,M,2,0,40 -exampleBAM.bam,23,GA,0,M,1,0,40 -exampleBAM.bam,45,TTATTATT,0,I,1,0,40 -exampleBAM.bam,5,74,1,M,1,0,40 -exampleBAM.bam,45,ATGATAAC,0,I,1,0,40 -exampleBAM.bam,45,51,1,I,5,0,40 -exampleBAM.bam,45,63,1,D,5,0,40 -exampleBAM.bam,45,CACCCAGA,0,I,1,0,40 -exampleBAM.bam,45,CGTGAGTG,0,D,1,0,40 -exampleBAM.bam,45,GCTTTATT,0,I,1,0,40 -exampleBAM.bam,45,ATGGTGGC,0,D,1,0,40 -exampleBAM.bam,34,CT,0,M,2,0,40 -exampleBAM.bam,4,72,1,M,1,0,40 -exampleBAM.bam,45,TCGGGTTT,0,I,2,0,40 -exampleBAM.bam,24,48,1,M,1,0,40 -exampleBAM.bam,45,TCCATGAT,0,I,1,0,40 -exampleBAM.bam,45,CACATGAT,0,I,1,0,40 -exampleBAM.bam,45,17,1,I,5,0,40 -exampleBAM.bam,45,29,1,D,5,0,40 -exampleBAM.bam,45,ATCAATAA,0,D,1,0,40 -exampleBAM.bam,45,ACCATGAT,0,I,1,0,40 -exampleBAM.bam,32,GT,0,M,6,0,40 -exampleBAM.bam,19,7,1,M,1,0,40 -exampleBAM.bam,33,45,1,M,1,0,40 -exampleBAM.bam,28,27,1,M,1,0,40 -exampleBAM.bam,45,TCCATTTC,0,I,1,0,40 -exampleBAM.bam,45,GATAACCT,0,D,1,0,40 -exampleBAM.bam,45,AACTGGGA,0,I,1,0,40 -exampleBAM.bam,4,GG,0,M,1,0,40 -exampleBAM.bam,33,GC,0,M,1,0,40 -exampleBAM.bam,45,TCAGGCCA,0,I,1,0,40 -exampleBAM.bam,45,TTGCACTT,0,I,1,0,40 -exampleBAM.bam,45,TTCACTGA,0,I,1,0,40 -exampleBAM.bam,45,CTCCAGGT,0,D,1,0,40 -exampleBAM.bam,6,CT,0,M,1,0,40 -exampleBAM.bam,23,15,1,M,1,0,40 -exampleBAM.bam,25,51,1,M,1,0,40 -exampleBAM.bam,32,72,1,M,1,0,40 -exampleBAM.bam,34,42,1,M,1,0,40 -exampleBAM.bam,45,GATATAAA,0,I,1,0,40 -exampleBAM.bam,45,CTAGAGTT,0,D,1,0,40 -exampleBAM.bam,45,50,1,I,5,0,40 -exampleBAM.bam,45,62,1,D,5,0,40 -exampleBAM.bam,45,GCCACCAT,0,D,1,0,40 -exampleBAM.bam,45,GGGTTCGG,0,D,3,0,40 -exampleBAM.bam,24,TC,0,M,3,0,40 -exampleBAM.bam,25,TT,0,M,2,0,40 -exampleBAM.bam,45,16,1,I,5,0,40 -exampleBAM.bam,45,28,1,D,5,0,40 -exampleBAM.bam,45,ACATGGTA,0,I,1,0,40 -exampleBAM.bam,16,34,1,M,1,1,1 -exampleBAM.bam,45,AATCTCCA,0,D,1,0,40 -exampleBAM.bam,45,ATTTCACT,0,I,1,0,40 -exampleBAM.bam,22,GT,0,M,2,0,40 -exampleBAM.bam,45,ATATCAAT,0,D,1,0,40 -exampleBAM.bam,45,CAATGTGA,0,D,1,0,40 -exampleBAM.bam,45,GAGTCAAT,0,D,1,0,40 -exampleBAM.bam,24,49,1,M,1,0,40 -exampleBAM.bam,45,GGGGGTTG,0,I,1,0,40 -exampleBAM.bam,45,TAGGGTTG,0,I,1,0,40 -exampleBAM.bam,45,TGCAATCC,0,I,1,0,40 -exampleBAM.bam,45,TGGGGTTG,0,I,1,0,40 -exampleBAM.bam,45,TTAATGAG,0,I,1,0,40 -exampleBAM.bam,30,30,1,M,1,0,40 -exampleBAM.bam,23,75,1,M,1,0,40 -exampleBAM.bam,32,GG,0,M,5,0,40 -exampleBAM.bam,20,9,1,M,1,0,40 -exampleBAM.bam,20,CT,0,M,1,0,40 -exampleBAM.bam,45,ATTAGATT,0,D,1,0,40 -exampleBAM.bam,33,44,1,M,1,0,40 -exampleBAM.bam,45,TTTCTGTG,0,I,1,0,40 -exampleBAM.bam,45,TGGAGATT,0,D,1,0,40 -exampleBAM.bam,45,GTTTGGGC,0,I,1,0,40 -exampleBAM.bam,21,11,1,M,1,0,40 -exampleBAM.bam,29,24,1,M,1,0,40 -exampleBAM.bam,32,46,1,M,1,0,40 -exampleBAM.bam,27,55,1,M,1,0,40 -exampleBAM.bam,45,ATATAAAG,0,I,1,0,40 -exampleBAM.bam,45,GAGTTTCA,0,D,1,0,40 -exampleBAM.bam,45,CACTTTCA,0,D,1,0,40 -exampleBAM.bam,45,CCATTTCA,0,D,1,0,40 -exampleBAM.bam,45,CCAGGCAC,0,D,1,0,40 -exampleBAM.bam,11,TT,0,M,1,1,1 -exampleBAM.bam,45,TTTCACTG,0,I,1,0,40 -exampleBAM.bam,33,GA,0,M,1,0,40 -exampleBAM.bam,45,TCGTGAGT,0,I,1,0,40 -exampleBAM.bam,45,TACTCTTT,0,D,1,0,40 -exampleBAM.bam,45,TAATGAGT,0,I,1,0,40 -exampleBAM.bam,45,GTGTCTTT,0,D,1,0,40 -exampleBAM.bam,45,GGCTTTAT,0,D,1,0,40 -exampleBAM.bam,22,70,1,M,1,0,40 -exampleBAM.bam,45,ATTTTTCT,0,I,1,0,40 -exampleBAM.bam,45,TGCCAGGC,0,I,1,0,40 -exampleBAM.bam,33,1,1,M,2,0,40 -exampleBAM.bam,45,TTTCAGGC,0,I,1,0,40 -exampleBAM.bam,45,TATTCTTA,0,I,1,0,40 -exampleBAM.bam,45,TGATAACC,0,I,1,0,40 -exampleBAM.bam,45,GTCTTTAT,0,D,1,0,40 -exampleBAM.bam,45,TGAACTGG,0,D,1,0,40 -exampleBAM.bam,21,AG,0,M,2,0,40 -exampleBAM.bam,32,33,1,M,2,0,40 -exampleBAM.bam,27,56,1,M,1,0,40 -exampleBAM.bam,45,GGCTGGGG,0,D,1,0,40 -exampleBAM.bam,45,GATTAGAT,0,D,1,0,40 -exampleBAM.bam,33,35,1,M,1,0,40 -exampleBAM.bam,45,TAGATTTT,0,D,1,0,40 -exampleBAM.bam,45,GGTTGGGG,0,D,2,0,40 -exampleBAM.bam,19,CT,0,M,2,1,3 -exampleBAM.bam,45,19,1,D,5,0,40 -exampleBAM.bam,45,31,1,I,5,0,40 -exampleBAM.bam,45,TGTTGGTT,0,D,1,0,40 -exampleBAM.bam,45,TTCTGTGT,0,I,1,0,40 -exampleBAM.bam,24,62,1,M,1,0,40 -exampleBAM.bam,45,TCGGGTTC,0,D,1,0,40 -exampleBAM.bam,45,GTTTCACA,0,D,1,0,40 -exampleBAM.bam,45,TAGGGTTC,0,D,1,0,40 -exampleBAM.bam,45,TTGGGTTC,0,D,1,0,40 -exampleBAM.bam,30,TT,0,M,2,0,40 -exampleBAM.bam,30,17,1,M,2,0,40 -exampleBAM.bam,33,69,1,M,1,0,40 -exampleBAM.bam,6,36,1,M,1,0,40 -exampleBAM.bam,17,GT,0,M,1,0,40 -exampleBAM.bam,21,64,1,M,1,0,40 -exampleBAM.bam,34,AC,0,M,1,0,40 -exampleBAM.bam,16,GC,0,M,1,0,40 -exampleBAM.bam,45,CCTCGTCC,0,I,1,0,40 -exampleBAM.bam,45,49,1,D,5,0,40 -exampleBAM.bam,45,61,1,I,5,0,40 -exampleBAM.bam,45,AGGTTATC,0,D,1,0,40 -exampleBAM.bam,45,GGCACCCA,0,D,1,0,40 -exampleBAM.bam,45,TGTGTCTT,0,D,1,0,40 -exampleBAM.bam,45,TCAATAAT,0,D,1,0,40 -exampleBAM.bam,45,TCTCCAGG,0,D,1,0,40 -exampleBAM.bam,6,AA,0,M,2,0,40 -exampleBAM.bam,31,TC,0,M,1,0,40 -exampleBAM.bam,31,19,1,M,1,0,40 -exampleBAM.bam,8,58,1,M,1,0,40 -exampleBAM.bam,28,54,1,M,1,0,40 -exampleBAM.bam,45,GGTGGCCT,0,D,1,0,40 -exampleBAM.bam,18,10,1,M,1,0,40 -exampleBAM.bam,18,CA,0,M,2,0,40 -exampleBAM.bam,27,57,1,M,1,0,40 -exampleBAM.bam,21,AT,0,M,1,0,40 -exampleBAM.bam,45,TGTATTTG,0,D,1,0,40 -exampleBAM.bam,45,TTCTAGAG,0,D,1,0,40 -exampleBAM.bam,45,GGTTGGGT,0,D,1,0,40 -exampleBAM.bam,45,GTTTGGGT,0,D,1,0,40 -exampleBAM.bam,13,TA,0,M,1,0,40 -exampleBAM.bam,20,AC,0,M,1,0,40 -exampleBAM.bam,45,CCCAGATC,0,I,1,0,40 -exampleBAM.bam,32,2,1,M,2,0,40 -exampleBAM.bam,27,27,1,M,1,0,40 -exampleBAM.bam,6,67,1,M,1,0,40 -exampleBAM.bam,45,TAGGGTTA,0,D,1,0,40 -exampleBAM.bam,45,GTTGGTTA,0,D,1,0,40 -exampleBAM.bam,45,TCATGGTG,0,D,1,0,40 -exampleBAM.bam,45,TCTAATCT,0,D,1,0,40 -exampleBAM.bam,45,TTGGGTTA,0,D,1,0,40 -exampleBAM.bam,30,TG,0,M,1,0,40 -exampleBAM.bam,45,18,1,D,5,0,40 -exampleBAM.bam,45,30,1,I,5,0,40 -exampleBAM.bam,45,CCAGGTTA,0,D,1,0,40 -exampleBAM.bam,45,CAAAATCT,0,D,1,0,40 -exampleBAM.bam,25,31,1,M,1,0,40 -exampleBAM.bam,34,6,1,M,1,0,40 -exampleBAM.bam,34,AA,0,M,1,0,40 -exampleBAM.bam,17,GG,0,M,1,0,40 -exampleBAM.bam,23,35,1,M,1,0,40 -exampleBAM.bam,45,TCTTTATA,0,D,1,0,40 -exampleBAM.bam,45,GATCCAGT,0,D,1,0,40 -exampleBAM.bam,45,48,1,D,5,0,40 -exampleBAM.bam,45,60,1,I,5,0,40 -exampleBAM.bam,45,ATCCATTT,0,I,1,0,40 -exampleBAM.bam,45,AATGAGTC,0,I,1,0,40 -exampleBAM.bam,31,TA,0,M,2,0,40 -exampleBAM.bam,21,AA,0,M,1,0,40 -exampleBAM.bam,34,65,1,M,1,0,40 -exampleBAM.bam,45,CTCCAGGT,0,I,1,0,40 -exampleBAM.bam,18,CT,0,M,1,0,40 -exampleBAM.bam,33,3,1,M,1,0,40 -exampleBAM.bam,45,TCAGGCCA,0,D,1,0,40 -exampleBAM.bam,45,TTGCACTT,0,D,1,0,40 -exampleBAM.bam,28,53,1,M,1,0,40 -exampleBAM.bam,45,TTCACTGA,0,D,1,0,40 -exampleBAM.bam,19,CC,0,M,1,0,40 -exampleBAM.bam,32,1,1,M,1,0,40 -exampleBAM.bam,45,GATAACCT,0,I,1,0,40 -exampleBAM.bam,45,AACTGGGA,0,D,1,0,40 -exampleBAM.bam,16,73,1,M,1,0,40 -exampleBAM.bam,45,TCCATTTC,0,D,1,0,40 -exampleBAM.bam,21,66,1,M,1,0,40 -exampleBAM.bam,34,5,1,M,1,0,40 -exampleBAM.bam,34,AT,0,M,6,0,40 -exampleBAM.bam,16,47,1,M,1,0,40 -exampleBAM.bam,45,CACATGAT,0,D,1,0,40 -exampleBAM.bam,45,17,1,D,5,0,40 -exampleBAM.bam,45,29,1,I,5,0,40 -exampleBAM.bam,45,ATCAATAA,0,I,1,0,40 -exampleBAM.bam,45,ACCATGAT,0,D,1,0,40 -exampleBAM.bam,45,TCGGGTTT,0,D,2,0,40 -exampleBAM.bam,45,TCCATGAT,0,D,1,0,40 -exampleBAM.bam,6,AG,0,M,1,1,1 -exampleBAM.bam,6,4,1,M,1,0,40 -exampleBAM.bam,31,TT,0,M,1,0,40 -exampleBAM.bam,45,ATGATAAC,0,D,1,0,40 -exampleBAM.bam,45,51,1,D,5,0,40 -exampleBAM.bam,45,63,1,I,5,0,40 -exampleBAM.bam,45,CGTGAGTG,0,I,1,0,40 -exampleBAM.bam,45,CACCCAGA,0,D,1,0,40 -exampleBAM.bam,16,GT,0,M,1,0,40 -exampleBAM.bam,5,70,1,M,1,0,40 -exampleBAM.bam,45,GCTTTATT,0,D,1,0,40 -exampleBAM.bam,45,ATGGTGGC,0,I,1,0,40 -exampleBAM.bam,45,TTATTATT,0,D,1,0,40 -exampleBAM.bam,34,64,1,M,1,0,40 -exampleBAM.bam,21,AC,0,M,3,0,40 -exampleBAM.bam,33,2,1,M,1,0,40 -exampleBAM.bam,45,TTTCACTG,0,D,1,0,40 -exampleBAM.bam,45,TCGTGAGT,0,D,1,0,40 -exampleBAM.bam,45,GTGTCTTT,0,I,1,0,40 -exampleBAM.bam,45,TAATGAGT,0,D,1,0,40 -exampleBAM.bam,45,TACTCTTT,0,I,1,0,40 -exampleBAM.bam,45,CACTTTCA,0,I,1,0,40 -exampleBAM.bam,45,CCATTTCA,0,I,1,0,40 -exampleBAM.bam,45,ATATAAAG,0,D,1,0,40 -exampleBAM.bam,45,GAGTTTCA,0,I,1,0,40 -exampleBAM.bam,45,CCAGGCAC,0,I,1,0,40 -exampleBAM.bam,29,54,1,M,1,0,40 -exampleBAM.bam,6,65,1,M,1,0,40 -exampleBAM.bam,19,10,1,M,1,0,40 -exampleBAM.bam,19,CA,0,M,2,0,40 -exampleBAM.bam,45,TTTCTGTG,0,D,1,0,40 -exampleBAM.bam,33,32,1,M,1,0,40 -exampleBAM.bam,45,GTTTGGGC,0,D,1,0,40 -exampleBAM.bam,45,TGGAGATT,0,I,1,0,40 -exampleBAM.bam,45,ATTAGATT,0,I,1,0,40 -exampleBAM.bam,34,4,1,M,1,0,40 -exampleBAM.bam,21,67,1,M,1,0,40 -exampleBAM.bam,45,TGGGGTTG,0,D,1,0,40 -exampleBAM.bam,45,TGCAATCC,0,D,1,0,40 -exampleBAM.bam,45,GGGGGTTG,0,D,1,0,40 -exampleBAM.bam,45,TAGGGTTG,0,D,1,0,40 -exampleBAM.bam,45,TTAATGAG,0,D,1,0,40 -exampleBAM.bam,30,18,1,M,1,0,40 -exampleBAM.bam,30,TA,0,M,4,0,40 -exampleBAM.bam,45,16,1,D,5,0,40 -exampleBAM.bam,45,28,1,I,5,0,40 -exampleBAM.bam,45,ACATGGTA,0,D,1,0,40 -exampleBAM.bam,45,GAGTCAAT,0,I,1,0,40 -exampleBAM.bam,45,CAATGTGA,0,I,1,0,40 -exampleBAM.bam,45,AATCTCCA,0,I,1,0,40 -exampleBAM.bam,45,ATTTCACT,0,D,1,0,40 -exampleBAM.bam,45,ATATCAAT,0,I,1,0,40 -exampleBAM.bam,8,57,1,M,1,1,1 -exampleBAM.bam,34,38,1,M,1,0,40 -exampleBAM.bam,31,16,1,M,1,0,40 -exampleBAM.bam,31,TG,0,M,3,0,40 -exampleBAM.bam,45,GGGTTCGG,0,I,3,0,40 -exampleBAM.bam,45,CTAGAGTT,0,I,1,0,40 -exampleBAM.bam,45,50,1,D,5,0,40 -exampleBAM.bam,45,62,1,I,5,0,40 -exampleBAM.bam,45,GATATAAA,0,D,1,0,40 -exampleBAM.bam,45,GCCACCAT,0,I,1,0,40 -exampleBAM.bam,45,ACCTGGAG,0,I,1,0,40 -exampleBAM.bam,5,AG,0,M,1,0,40 -exampleBAM.bam,45,AGGTGGAG,0,I,1,0,40 -exampleBAM.bam,45,GCAAAATC,0,D,1,0,40 -exampleBAM.bam,45,CACAGCAA,0,I,1,0,40 -exampleBAM.bam,28,TT,0,M,1,0,40 -exampleBAM.bam,33,39,1,M,1,0,40 -exampleBAM.bam,19,GT,0,M,1,0,40 -exampleBAM.bam,23,64,1,M,2,0,40 -exampleBAM.bam,27,30,1,M,1,0,40 -exampleBAM.bam,32,AC,0,M,1,0,40 -exampleBAM.bam,45,AAGTGACA,0,D,1,0,40 -exampleBAM.bam,5,38,1,M,1,0,40 -exampleBAM.bam,45,AGAGTTTC,0,D,1,0,40 -exampleBAM.bam,45,AGTGACAT,0,D,1,0,40 -exampleBAM.bam,45,GCCTGAAA,0,I,1,0,40 -exampleBAM.bam,45,CTCTTTGT,0,I,1,0,40 -exampleBAM.bam,33,AT,0,M,2,0,40 -exampleBAM.bam,45,TGGCAGCC,0,I,1,0,40 -exampleBAM.bam,4,AA,0,M,1,0,40 -exampleBAM.bam,29,TC,0,M,1,0,40 -exampleBAM.bam,34,71,1,M,1,0,40 -exampleBAM.bam,45,AGTTTCAC,0,D,1,0,40 -exampleBAM.bam,45,CATTTCAC,0,D,1,0,40 -exampleBAM.bam,45,53,1,D,5,0,40 -exampleBAM.bam,45,57,1,I,5,0,40 -exampleBAM.bam,45,CATGATAA,0,I,1,0,40 -exampleBAM.bam,45,TAGAGTTT,0,D,1,0,40 -exampleBAM.bam,45,GGTTCGGG,0,D,3,0,40 -exampleBAM.bam,45,CTTTATTA,0,I,1,0,40 -exampleBAM.bam,45,CTTTGTAT,0,D,1,0,40 -exampleBAM.bam,45,AGCCTCGT,0,I,1,0,40 -exampleBAM.bam,45,CTGTGTCT,0,I,1,0,40 -exampleBAM.bam,45,CTTAAGTG,0,I,1,0,40 -exampleBAM.bam,45,ATTCTATT,0,D,1,0,40 -exampleBAM.bam,45,CTAATCTC,0,D,1,0,40 -exampleBAM.bam,45,23,1,D,5,0,40 -exampleBAM.bam,45,27,1,I,5,0,40 -exampleBAM.bam,30,21,1,M,1,0,40 -exampleBAM.bam,45,TGAAAGTG,0,I,1,0,40 -exampleBAM.bam,45,TGGTATTA,0,I,1,0,40 -exampleBAM.bam,23,38,1,M,1,0,40 -exampleBAM.bam,34,3,1,M,1,0,40 -exampleBAM.bam,45,GGTTAGGG,0,I,2,0,40 -exampleBAM.bam,45,GTGCAAAG,0,D,1,0,40 -exampleBAM.bam,28,TG,0,M,3,0,40 -exampleBAM.bam,45,ATTCTTAA,0,D,1,0,40 -exampleBAM.bam,45,GAGCCTTT,0,I,1,0,40 -exampleBAM.bam,27,31,1,M,1,0,40 -exampleBAM.bam,29,48,1,M,1,0,40 -exampleBAM.bam,32,AA,0,M,1,0,40 -exampleBAM.bam,19,GG,0,M,2,0,40 -exampleBAM.bam,4,37,1,M,1,0,40 -exampleBAM.bam,45,GGGTTTGG,0,I,2,0,40 -exampleBAM.bam,33,AG,0,M,3,0,40 -exampleBAM.bam,28,50,1,M,1,0,40 -exampleBAM.bam,45,ATTACTCT,0,D,1,0,40 -exampleBAM.bam,45,ACACAGCA,0,I,1,0,40 -exampleBAM.bam,45,ATGTGAAC,0,I,1,0,40 -exampleBAM.bam,32,36,1,M,2,0,40 -exampleBAM.bam,29,TA,0,M,2,0,40 -exampleBAM.bam,34,70,1,M,1,0,40 -exampleBAM.bam,17,76,1,M,1,1,1 -exampleBAM.bam,30,54,1,M,1,0,40 -exampleBAM.bam,24,25,1,M,1,0,40 -exampleBAM.bam,45,ATCGTGAG,0,D,1,0,40 -exampleBAM.bam,45,GATCGTGA,0,I,1,0,40 -exampleBAM.bam,45,52,1,D,5,0,40 -exampleBAM.bam,45,56,1,I,5,0,40 -exampleBAM.bam,45,CCAGATCC,0,D,1,0,40 -exampleBAM.bam,16,CA,0,M,1,0,40 -exampleBAM.bam,8,63,1,M,1,0,40 -exampleBAM.bam,14,TG,0,M,1,0,40 -exampleBAM.bam,23,AT,0,M,3,0,40 -exampleBAM.bam,19,72,1,M,1,0,40 -exampleBAM.bam,30,20,1,M,1,0,40 -exampleBAM.bam,45,TTCTATTC,0,I,1,0,40 -exampleBAM.bam,45,GTCAATGT,0,D,1,0,40 -exampleBAM.bam,45,AAAATCTA,0,D,1,0,40 -exampleBAM.bam,45,22,1,D,5,0,40 -exampleBAM.bam,45,26,1,I,5,0,40 -exampleBAM.bam,34,2,1,M,1,0,40 -exampleBAM.bam,19,GC,0,M,1,0,40 -exampleBAM.bam,6,68,1,M,1,1,1 -exampleBAM.bam,23,66,1,M,1,0,40 -exampleBAM.bam,27,28,1,M,1,0,40 -exampleBAM.bam,32,AT,0,M,2,0,40 -exampleBAM.bam,5,AA,0,M,1,0,40 -exampleBAM.bam,45,TATTACTC,0,D,1,0,40 -exampleBAM.bam,33,37,1,M,1,0,40 -exampleBAM.bam,45,TGGGCTGG,0,D,1,0,40 -exampleBAM.bam,28,TC,0,M,1,0,40 -exampleBAM.bam,4,AG,0,M,1,0,40 -exampleBAM.bam,29,TT,0,M,2,0,40 -exampleBAM.bam,18,GT,0,M,1,0,40 -exampleBAM.bam,45,AAAGACAC,0,D,1,0,40 -exampleBAM.bam,45,GCCTTTGC,0,I,1,0,40 -exampleBAM.bam,45,ACCCAGAT,0,D,1,0,40 -exampleBAM.bam,45,TCTTAAGT,0,I,1,0,40 -exampleBAM.bam,13,55,1,M,1,0,40 -exampleBAM.bam,45,GTATTTGC,0,I,1,0,40 -exampleBAM.bam,33,7,1,M,1,0,40 -exampleBAM.bam,33,AC,0,M,1,0,40 -exampleBAM.bam,23,AA,0,M,1,0,40 -exampleBAM.bam,8,60,1,M,1,0,40 -exampleBAM.bam,22,38,1,M,1,0,40 -exampleBAM.bam,45,CATGATCG,0,D,1,0,40 -exampleBAM.bam,45,55,1,D,5,0,40 -exampleBAM.bam,45,59,1,I,5,0,40 -exampleBAM.bam,45,TCCAGTTC,0,D,1,0,40 -exampleBAM.bam,45,GTGACATG,0,I,1,0,40 -exampleBAM.bam,45,TTCACATG,0,I,1,0,40 -exampleBAM.bam,45,TAAGTGAC,0,D,1,0,40 -exampleBAM.bam,4,64,1,M,1,1,1 -exampleBAM.bam,25,24,1,M,1,0,40 -exampleBAM.bam,22,AG,0,M,2,0,40 -exampleBAM.bam,45,CTTTCAGG,0,D,1,0,40 -exampleBAM.bam,45,ATCATGGT,0,I,1,0,40 -exampleBAM.bam,45,21,1,D,5,0,40 -exampleBAM.bam,45,25,1,I,5,0,40 -exampleBAM.bam,45,GACATGGT,0,I,1,0,40 -exampleBAM.bam,30,23,1,M,1,0,40 -exampleBAM.bam,33,67,1,M,1,0,40 -exampleBAM.bam,24,56,1,M,1,0,40 -exampleBAM.bam,45,TATTATTG,0,I,1,0,40 -exampleBAM.bam,45,GTTAATGA,0,D,1,0,40 -exampleBAM.bam,32,AG,0,M,1,0,40 -exampleBAM.bam,23,67,1,M,1,0,40 -exampleBAM.bam,45,TGGAGCCT,0,D,1,0,40 -exampleBAM.bam,45,TGGTGGCC,0,D,1,0,40 -exampleBAM.bam,28,TA,0,M,1,0,40 -exampleBAM.bam,45,CAGCAAAA,0,D,1,0,40 -exampleBAM.bam,45,GGCAGCCT,0,D,1,0,40 -exampleBAM.bam,34,68,1,M,1,0,40 -exampleBAM.bam,21,3,1,M,1,0,40 -exampleBAM.bam,45,TCTTTGTA,0,D,1,0,40 -exampleBAM.bam,45,GTTCGGGT,0,D,3,0,40 -exampleBAM.bam,28,48,1,M,1,0,40 -exampleBAM.bam,33,AA,0,M,1,0,40 -exampleBAM.bam,18,GG,0,M,1,0,40 -exampleBAM.bam,45,CGGGTTTG,0,D,2,0,40 -exampleBAM.bam,34,34,1,M,1,0,40 -exampleBAM.bam,23,AC,0,M,1,0,40 -exampleBAM.bam,30,52,1,M,1,0,40 -exampleBAM.bam,24,27,1,M,1,0,40 -exampleBAM.bam,45,AGGCCACC,0,D,1,0,40 -exampleBAM.bam,20,69,1,M,1,0,40 -exampleBAM.bam,45,AAAGTGCA,0,I,1,0,40 -exampleBAM.bam,45,ATTGATAT,0,I,1,0,40 -exampleBAM.bam,45,AATGTGAA,0,D,1,0,40 -exampleBAM.bam,45,54,1,D,5,0,40 -exampleBAM.bam,45,58,1,I,5,0,40 -exampleBAM.bam,45,ACTTTCAG,0,D,1,0,40 -exampleBAM.bam,23,37,1,M,1,0,40 -exampleBAM.bam,21,71,1,M,1,0,40 -exampleBAM.bam,33,66,1,M,1,0,40 -exampleBAM.bam,15,TG,0,M,1,0,40 -exampleBAM.bam,45,TTGTATTT,0,I,1,0,40 -exampleBAM.bam,45,20,1,D,5,0,40 -exampleBAM.bam,45,24,1,I,5,0,40 -exampleBAM.bam,45,CAGGCCAC,0,I,1,0,40 -exampleBAM.bam,23,59,1,M,1,0,40 -exampleBAM.bam,17,20,1,M,1,0,40 -exampleBAM.bam,30,CG,0,M,1,0,40 -exampleBAM.bam,45,TTGATATA,0,I,1,0,40 -exampleBAM.bam,45,TTCTTAAG,0,I,1,0,40 -exampleBAM.bam,15,14,1,M,1,0,40 -exampleBAM.bam,45,GAACTGGG,0,D,1,0,40 -exampleBAM.bam,45,6,1,I,5,0,40 -exampleBAM.bam,45,10,1,D,5,0,40 -exampleBAM.bam,45,GGGCTGGG,0,D,1,0,40 -exampleBAM.bam,31,10,1,M,1,0,40 -exampleBAM.bam,34,60,1,M,1,0,40 -exampleBAM.bam,25,37,1,M,1,0,40 -exampleBAM.bam,6,31,1,M,1,1,1 -exampleBAM.bam,30,42,1,M,1,0,40 -exampleBAM.bam,45,GTTCTAGA,0,D,1,0,40 -exampleBAM.bam,45,TATTTGCA,0,D,1,0,40 -exampleBAM.bam,24,5,1,M,1,0,40 -exampleBAM.bam,45,CCTTTGCA,0,D,1,0,40 -exampleBAM.bam,45,CAGGCACC,0,I,1,0,40 -exampleBAM.bam,45,36,1,I,5,0,40 -exampleBAM.bam,45,40,1,D,5,0,40 -exampleBAM.bam,29,GA,0,M,2,0,40 -exampleBAM.bam,21,29,1,M,1,0,40 -exampleBAM.bam,45,TAATCTCC,0,I,1,0,40 -exampleBAM.bam,15,74,1,M,1,0,40 -exampleBAM.bam,45,TTGGGGGT,0,I,1,0,40 -exampleBAM.bam,33,24,1,M,1,0,40 -exampleBAM.bam,45,GTTGGGGT,0,I,1,0,40 -exampleBAM.bam,45,GCTGGGGT,0,I,1,0,40 -exampleBAM.bam,45,66,1,I,5,0,40 -exampleBAM.bam,45,CTTGGCTT,0,D,1,0,40 -exampleBAM.bam,45,GGCCACCA,0,D,1,0,40 -exampleBAM.bam,19,TG,0,M,2,0,40 -exampleBAM.bam,45,TTCAGGCC,0,I,1,0,40 -exampleBAM.bam,45,GGTTAATG,0,I,1,0,40 -exampleBAM.bam,45,GGTGGAGC,0,I,1,0,40 -exampleBAM.bam,28,GG,0,M,3,0,40 -exampleBAM.bam,45,GAGATTAG,0,I,1,0,40 -exampleBAM.bam,45,7,1,I,5,0,40 -exampleBAM.bam,45,11,1,D,5,0,40 -exampleBAM.bam,45,TTACTCTT,0,I,1,0,40 -exampleBAM.bam,30,9,1,M,1,0,40 -exampleBAM.bam,45,TTTATATC,0,I,1,0,40 -exampleBAM.bam,45,TGGTTAAT,0,I,1,0,40 -exampleBAM.bam,45,GTATTACT,0,D,1,0,40 -exampleBAM.bam,31,11,1,M,1,0,40 -exampleBAM.bam,31,CC,0,M,1,0,40 -exampleBAM.bam,34,61,1,M,1,0,40 -exampleBAM.bam,25,36,1,M,1,0,40 -exampleBAM.bam,45,ACAGCAAA,0,D,1,0,40 -exampleBAM.bam,45,AGTGCAAA,0,D,1,0,40 -exampleBAM.bam,45,37,1,I,5,0,40 -exampleBAM.bam,45,41,1,D,5,0,40 -exampleBAM.bam,45,TCCAGGTT,0,I,1,0,40 -exampleBAM.bam,45,GTGAGTGT,0,D,1,0,40 -exampleBAM.bam,45,TTATCATG,0,D,1,0,40 -exampleBAM.bam,24,AG,0,M,2,0,40 -exampleBAM.bam,29,GC,0,M,1,0,40 -exampleBAM.bam,32,57,1,M,1,0,40 -exampleBAM.bam,45,67,1,I,5,0,40 -exampleBAM.bam,18,19,1,M,1,0,40 -exampleBAM.bam,45,CTGGAGAT,0,I,1,0,40 -exampleBAM.bam,45,AGATTTTT,0,I,1,0,40 -exampleBAM.bam,45,AAATCTAA,0,D,1,0,40 -exampleBAM.bam,45,CTGAAAGT,0,D,1,0,40 -exampleBAM.bam,45,AGGCACCC,0,D,1,0,40 -exampleBAM.bam,45,TCTGTGTC,0,I,1,0,40 -exampleBAM.bam,45,TTGGGCTG,0,D,1,0,40 -exampleBAM.bam,28,47,1,M,1,0,40 -exampleBAM.bam,45,GTTGGGGG,0,I,1,0,40 -exampleBAM.bam,19,TT,0,M,2,0,40 -exampleBAM.bam,29,45,1,M,1,0,40 -exampleBAM.bam,45,CCTGGAGA,0,I,1,0,40 -exampleBAM.bam,45,ATGATTCT,0,D,1,0,40 -exampleBAM.bam,45,GCCAGGCA,0,I,1,0,40 -exampleBAM.bam,45,TTTATTAT,0,I,1,0,40 -exampleBAM.bam,33,59,1,M,1,0,40 -exampleBAM.bam,45,TCTATTCT,0,D,1,0,40 -exampleBAM.bam,45,TAACCTGG,0,I,1,0,40 -exampleBAM.bam,30,CA,0,M,3,0,40 -exampleBAM.bam,15,GG,0,M,2,0,40 -exampleBAM.bam,45,GACACAGC,0,I,1,0,40 -exampleBAM.bam,45,AACCTGGA,0,D,1,0,40 -exampleBAM.bam,45,4,1,I,5,0,40 -exampleBAM.bam,45,8,1,D,5,0,40 -exampleBAM.bam,25,AT,0,M,2,0,40 -exampleBAM.bam,6,63,1,M,2,0,40 -exampleBAM.bam,45,TTTGCAAT,0,D,1,0,40 -exampleBAM.bam,45,TTTGCACT,0,I,1,0,40 -exampleBAM.bam,45,TTAAGTGA,0,D,1,0,40 -exampleBAM.bam,45,TGAGTCAA,0,I,1,0,40 -exampleBAM.bam,22,59,1,M,1,0,40 -exampleBAM.bam,45,CTCGTCCA,0,D,1,0,40 -exampleBAM.bam,45,38,1,I,5,0,40 -exampleBAM.bam,45,42,1,D,5,0,40 -exampleBAM.bam,34,62,1,M,1,0,40 -exampleBAM.bam,31,CG,0,M,1,0,40 -exampleBAM.bam,31,8,1,M,2,0,40 -exampleBAM.bam,27,69,1,M,1,0,40 -exampleBAM.bam,26,3,1,M,1,0,40 -exampleBAM.bam,45,TATAAAGA,0,D,1,0,40 -exampleBAM.bam,45,GGGGTTGG,0,D,2,0,40 -exampleBAM.bam,45,64,1,I,5,0,40 -exampleBAM.bam,45,76,1,D,5,0,40 -exampleBAM.bam,45,GATTCTAT,0,D,1,0,40 -exampleBAM.bam,45,AGACACAG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTGG,0,D,1,0,40 -exampleBAM.bam,45,AGTGTTGG,0,D,1,0,40 -exampleBAM.bam,29,12,1,M,1,0,40 -exampleBAM.bam,29,GG,0,M,4,0,40 -exampleBAM.bam,8,71,1,M,1,0,40 -exampleBAM.bam,45,GTGAACTG,0,I,1,0,40 -exampleBAM.bam,45,TTGGCTTT,0,D,1,0,40 -exampleBAM.bam,9,69,1,M,1,0,40 -exampleBAM.bam,45,CCTGAAAG,0,I,1,0,40 -exampleBAM.bam,45,CTTTGCAC,0,D,1,0,40 -exampleBAM.bam,20,29,1,M,1,0,40 -exampleBAM.bam,12,40,1,M,1,0,40 -exampleBAM.bam,32,24,1,M,1,0,40 -exampleBAM.bam,21,61,1,M,1,0,40 -exampleBAM.bam,45,CATGGTAT,0,I,1,0,40 -exampleBAM.bam,45,GCACCCAG,0,D,1,0,40 -exampleBAM.bam,16,55,1,M,1,0,40 -exampleBAM.bam,45,ATGATCGT,0,D,1,0,40 -exampleBAM.bam,45,5,1,I,5,0,40 -exampleBAM.bam,45,9,1,D,5,0,40 -exampleBAM.bam,30,CC,0,M,2,0,40 -exampleBAM.bam,23,56,1,M,1,0,40 -exampleBAM.bam,6,62,1,M,1,0,40 -exampleBAM.bam,31,43,1,M,1,0,40 -exampleBAM.bam,25,AG,0,M,1,0,40 -exampleBAM.bam,45,ATAACCTG,0,D,1,0,40 -exampleBAM.bam,45,39,1,I,5,0,40 -exampleBAM.bam,45,43,1,D,5,0,40 -exampleBAM.bam,45,GAAAGTGC,0,D,1,0,40 -exampleBAM.bam,24,AA,0,M,1,0,40 -exampleBAM.bam,24,6,1,M,2,0,40 -exampleBAM.bam,45,TTATTGAT,0,I,1,0,40 -exampleBAM.bam,34,63,1,M,1,0,40 -exampleBAM.bam,31,CT,0,M,1,0,40 -exampleBAM.bam,45,65,1,I,5,0,40 -exampleBAM.bam,18,TT,0,M,1,1,1 -exampleBAM.bam,45,GATTTTTC,0,I,1,0,40 -exampleBAM.bam,45,AGTTCTAG,0,D,1,0,40 -exampleBAM.bam,45,TAAAGACA,0,I,1,0,40 -exampleBAM.bam,45,TGAGTGTT,0,I,1,0,40 -exampleBAM.bam,45,TTTCACAT,0,I,1,0,40 -exampleBAM.bam,45,GTGGAGCC,0,D,1,0,40 -exampleBAM.bam,19,49,1,M,1,0,40 -exampleBAM.bam,29,GT,0,M,2,0,40 -exampleBAM.bam,5,26,1,M,1,1,1 -exampleBAM.bam,45,AAGTGCAA,0,D,1,0,40 -exampleBAM.bam,45,ATTTGCAA,0,D,1,0,40 -exampleBAM.bam,45,ATCTAATC,0,I,1,0,40 -exampleBAM.bam,20,28,1,M,1,1,1 -exampleBAM.bam,45,GGTATTAC,0,I,1,0,40 -exampleBAM.bam,45,TGTGAACT,0,D,1,0,40 -exampleBAM.bam,45,TGGCCTGA,0,I,1,0,40 -exampleBAM.bam,33,57,1,M,1,0,40 -exampleBAM.bam,21,60,1,M,1,0,40 -exampleBAM.bam,29,47,1,M,1,0,40 -exampleBAM.bam,34,56,1,M,1,0,40 -exampleBAM.bam,31,GA,0,M,2,0,40 -exampleBAM.bam,45,TCGTCCAT,0,D,1,0,40 -exampleBAM.bam,45,TGATTCTA,0,I,1,0,40 -exampleBAM.bam,45,ATCCAGTT,0,D,1,0,40 -exampleBAM.bam,45,32,1,I,5,0,40 -exampleBAM.bam,45,44,1,D,5,0,40 -exampleBAM.bam,45,CATGATTC,0,D,1,0,40 -exampleBAM.bam,45,CAATCCAT,0,D,1,0,40 -exampleBAM.bam,45,CAGTTCTA,0,I,1,0,40 -exampleBAM.bam,34,26,1,M,1,0,40 -exampleBAM.bam,8,AT,0,M,1,1,1 -exampleBAM.bam,45,GGGTTAGG,0,D,2,0,40 -exampleBAM.bam,30,12,1,M,1,0,40 -exampleBAM.bam,45,TATATCAA,0,I,1,0,40 -exampleBAM.bam,45,GCAATCCA,0,D,1,0,40 -exampleBAM.bam,45,GGAGCCTT,0,D,1,0,40 -exampleBAM.bam,45,CAGATCCA,0,D,1,0,40 -exampleBAM.bam,45,2,1,I,5,0,40 -exampleBAM.bam,45,14,1,D,5,0,40 -exampleBAM.bam,45,GAGTGTTG,0,I,1,0,40 -exampleBAM.bam,32,30,1,M,1,0,40 -exampleBAM.bam,27,AC,0,M,1,0,40 -exampleBAM.bam,21,59,1,M,1,0,40 -exampleBAM.bam,45,TGTCTTTA,0,I,1,0,40 -exampleBAM.bam,45,TCAATGTG,0,I,1,0,40 -exampleBAM.bam,45,TGGCTTTA,0,I,1,0,40 -exampleBAM.bam,13,GA,0,M,1,0,40 -exampleBAM.bam,45,CCATGATT,0,D,1,0,40 -exampleBAM.bam,29,CA,0,M,1,0,40 -exampleBAM.bam,19,54,1,M,1,0,40 -exampleBAM.bam,45,TATCAATA,0,I,1,0,40 -exampleBAM.bam,45,TTTGGGCT,0,I,1,0,40 -exampleBAM.bam,45,TTGGTTAA,0,I,1,0,40 -exampleBAM.bam,45,TGCACTTT,0,D,1,0,40 -exampleBAM.bam,45,TCTAGAGT,0,I,1,0,40 -exampleBAM.bam,26,AT,0,M,1,0,40 -exampleBAM.bam,20,57,1,M,1,0,40 -exampleBAM.bam,45,GCCTCGTC,0,D,1,0,40 -exampleBAM.bam,45,70,1,I,5,0,40 -exampleBAM.bam,45,74,1,D,5,0,40 -exampleBAM.bam,18,22,1,M,1,0,40 -exampleBAM.bam,25,32,1,M,1,0,40 -exampleBAM.bam,27,66,1,M,1,0,40 -exampleBAM.bam,31,15,1,M,2,0,40 -exampleBAM.bam,31,GC,0,M,3,0,40 -exampleBAM.bam,45,33,1,I,5,0,40 -exampleBAM.bam,45,45,1,D,5,0,40 -exampleBAM.bam,45,GGAGATTA,0,D,1,0,40 -exampleBAM.bam,45,AGATCCAG,0,D,1,0,40 -exampleBAM.bam,16,19,1,M,1,0,40 -exampleBAM.bam,45,ATGGTATT,0,I,1,0,40 -exampleBAM.bam,45,ATCTCCAG,0,D,1,0,40 -exampleBAM.bam,13,75,1,M,1,0,40 -exampleBAM.bam,45,TTTGTATT,0,I,1,0,40 -exampleBAM.bam,45,TATCATGG,0,I,1,0,40 -exampleBAM.bam,45,TGACATGG,0,I,1,0,40 -exampleBAM.bam,17,TT,0,M,3,1,5 -exampleBAM.bam,31,45,1,M,1,0,40 -exampleBAM.bam,8,AG,0,M,2,0,40 -exampleBAM.bam,34,27,1,M,1,0,40 -exampleBAM.bam,45,3,1,I,5,0,40 -exampleBAM.bam,45,15,1,D,5,0,40 -exampleBAM.bam,45,TTATATCA,0,I,1,0,40 -exampleBAM.bam,45,TGATATAA,0,D,1,0,40 -exampleBAM.bam,45,GGTTATCA,0,I,1,0,40 -exampleBAM.bam,45,TCACTGAT,0,I,1,0,40 -exampleBAM.bam,45,GTGGCCTG,0,D,1,0,40 -exampleBAM.bam,19,21,1,M,2,0,40 -exampleBAM.bam,32,31,1,M,1,0,40 -exampleBAM.bam,27,AA,0,M,1,0,40 -exampleBAM.bam,45,CACTGATG,0,D,1,0,40 -exampleBAM.bam,45,ATAAAGAC,0,I,1,0,40 -exampleBAM.bam,45,GCACTTTC,0,I,1,0,40 -exampleBAM.bam,45,CAGCCTCG,0,I,1,0,40 -exampleBAM.bam,28,CT,0,M,2,0,40 -exampleBAM.bam,45,71,1,I,5,0,40 -exampleBAM.bam,45,75,1,D,5,0,40 -exampleBAM.bam,45,AGCAAAAT,0,I,1,0,40 -exampleBAM.bam,45,TTGCAATC,0,I,1,0,40 -exampleBAM.bam,33,29,1,M,2,0,40 -exampleBAM.bam,26,AG,0,M,1,0,40 -exampleBAM.bam,45,GGTTTGGG,0,D,2,0,40 -exampleBAM.bam,45,GGGTTGGG,0,D,3,0,40 -exampleBAM.bam,24,3,1,M,1,0,40 -exampleBAM.bam,45,TTTTTCTG,0,I,1,0,40 -exampleBAM.bam,45,TTAGATTT,0,D,1,0,40 -exampleBAM.bam,16,TG,0,M,2,0,40 -exampleBAM.bam,45,34,1,I,5,0,40 -exampleBAM.bam,45,46,1,D,5,0,40 -exampleBAM.bam,45,ATGAGTCA,0,D,1,0,40 -exampleBAM.bam,27,65,1,M,1,0,40 -exampleBAM.bam,31,12,1,M,1,0,40 -exampleBAM.bam,31,GG,0,M,4,0,40 -exampleBAM.bam,34,58,1,M,1,0,40 -exampleBAM.bam,24,33,1,M,1,0,40 -exampleBAM.bam,15,8,1,M,1,0,40 -exampleBAM.bam,26,67,1,M,1,0,40 -exampleBAM.bam,30,GA,0,M,2,0,40 -exampleBAM.bam,45,12,1,D,5,0,40 -exampleBAM.bam,45,GGCCTGAA,0,I,1,0,40 -exampleBAM.bam,45,AGATTAGA,0,D,1,0,40 -exampleBAM.bam,45,GCAGCCTC,0,D,1,0,40 -exampleBAM.bam,45,CATGGTGG,0,D,1,0,40 -exampleBAM.bam,45,AATCCATT,0,D,1,0,40 -exampleBAM.bam,45,CTTTATAT,0,D,1,0,40 -exampleBAM.bam,29,76,1,M,1,0,40 -exampleBAM.bam,23,61,1,M,1,0,40 -exampleBAM.bam,28,CA,0,M,2,0,40 -exampleBAM.bam,45,GTTAGGGT,0,I,3,0,40 -exampleBAM.bam,45,ACTCTTTG,0,I,1,0,40 -exampleBAM.bam,45,AGCCTTTG,0,I,1,0,40 -exampleBAM.bam,45,ACATGATC,0,D,1,0,40 -exampleBAM.bam,45,ATTATTGA,0,D,1,0,40 -exampleBAM.bam,32,28,1,M,2,0,40 -exampleBAM.bam,29,42,1,M,1,0,40 -exampleBAM.bam,27,AT,0,M,4,0,40 -exampleBAM.bam,45,TGGGTTAG,0,I,1,0,40 -exampleBAM.bam,45,TGGGTTCG,0,D,1,0,40 -exampleBAM.bam,26,7,1,M,1,0,40 -exampleBAM.bam,45,TTTTCTGT,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTAG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTCG,0,D,1,0,40 -exampleBAM.bam,45,CGGGTTCG,0,D,1,0,40 -exampleBAM.bam,45,68,1,I,5,0,40 -exampleBAM.bam,45,72,1,D,5,0,40 -exampleBAM.bam,45,AGTCAATG,0,I,1,0,40 -exampleBAM.bam,29,8,1,M,1,0,40 -exampleBAM.bam,29,CG,0,M,2,0,40 -exampleBAM.bam,4,29,1,M,1,0,40 -exampleBAM.bam,16,TT,0,M,4,1,6 -exampleBAM.bam,45,CACCATGA,0,I,1,0,40 -exampleBAM.bam,45,35,1,I,5,0,40 -exampleBAM.bam,45,47,1,D,5,0,40 -exampleBAM.bam,45,CTATTCTT,0,I,1,0,40 -exampleBAM.bam,45,AATCTAAT,0,I,1,0,40 -exampleBAM.bam,45,GTGTTGGT,0,D,1,0,40 -exampleBAM.bam,30,45,1,M,1,0,40 -exampleBAM.bam,45,TCACATGA,0,I,1,0,40 -exampleBAM.bam,9,AG,0,M,1,0,40 -exampleBAM.bam,45,GTCCATGA,0,I,1,0,40 -exampleBAM.bam,31,13,1,M,1,0,40 -exampleBAM.bam,31,GT,0,M,1,0,40 -exampleBAM.bam,34,59,1,M,1,0,40 -exampleBAM.bam,45,AAGACACA,0,I,1,0,40 -exampleBAM.bam,45,CCACCATG,0,D,1,0,40 -exampleBAM.bam,45,1,1,I,5,0,40 -exampleBAM.bam,45,13,1,D,5,0,40 -exampleBAM.bam,16,51,1,M,1,0,40 -exampleBAM.bam,45,CGTCCATG,0,D,1,0,40 -exampleBAM.bam,45,CTGGGGTT,0,I,1,0,40 -exampleBAM.bam,45,GTTGGGTT,0,I,1,0,40 -exampleBAM.bam,45,TTCGGGTT,0,I,3,0,40 -exampleBAM.bam,45,TTAGGGTT,0,I,3,0,40 -exampleBAM.bam,45,TGGGGGTT,0,I,1,0,40 -exampleBAM.bam,45,TTTGGGTT,0,I,1,0,40 -exampleBAM.bam,45,TTGGGGTT,0,I,1,0,40 -exampleBAM.bam,9,38,1,M,1,0,40 -exampleBAM.bam,45,GTTATCAT,0,I,1,0,40 -exampleBAM.bam,30,GC,0,M,1,0,40 -exampleBAM.bam,17,TC,0,M,1,0,40 -exampleBAM.bam,34,25,1,M,1,0,40 -exampleBAM.bam,45,CCATGATA,0,D,1,0,40 -exampleBAM.bam,28,11,1,M,1,0,40 -exampleBAM.bam,45,TATTGATA,0,D,1,0,40 -exampleBAM.bam,29,43,1,M,1,0,40 -exampleBAM.bam,45,CCAGTTCT,0,D,1,0,40 -exampleBAM.bam,45,CAGGTTAT,0,I,1,0,40 -exampleBAM.bam,45,69,1,I,5,0,40 -exampleBAM.bam,45,73,1,D,5,0,40 -exampleBAM.bam,28,41,1,M,1,0,40 -exampleBAM.bam,33,31,1,M,1,0,40 -exampleBAM.bam,45,TGATCGTG,0,D,1,0,40 -exampleBAM.bam,29,9,1,M,1,0,40 -exampleBAM.bam,12,GC,0,M,1,0,40 -exampleBAM.bam,29,6,1,M,1,0,40 -exampleBAM.bam,45,GCCTCGTC,0,I,1,0,40 -exampleBAM.bam,45,70,1,D,5,0,40 -exampleBAM.bam,45,74,1,I,5,0,40 -exampleBAM.bam,45,TTTGGGCT,0,D,1,0,40 -exampleBAM.bam,45,TATCAATA,0,D,1,0,40 -exampleBAM.bam,33,TG,0,M,3,0,40 -exampleBAM.bam,45,TTGGTTAA,0,D,1,0,40 -exampleBAM.bam,45,TCTAGAGT,0,D,1,0,40 -exampleBAM.bam,45,TGCACTTT,0,I,1,0,40 -exampleBAM.bam,4,49,1,M,1,0,40 -exampleBAM.bam,32,18,1,M,1,0,40 -exampleBAM.bam,10,GT,0,M,1,0,40 -exampleBAM.bam,27,11,1,M,1,0,40 -exampleBAM.bam,27,CC,0,M,1,0,40 -exampleBAM.bam,45,CCATGATT,0,I,1,0,40 -exampleBAM.bam,5,TT,0,M,2,1,3 -exampleBAM.bam,18,56,1,M,1,0,40 -exampleBAM.bam,45,TGGCTTTA,0,D,1,0,40 -exampleBAM.bam,45,TGTCTTTA,0,D,1,0,40 -exampleBAM.bam,45,TCAATGTG,0,D,1,0,40 -exampleBAM.bam,12,68,1,M,1,0,40 -exampleBAM.bam,31,32,1,M,1,0,40 -exampleBAM.bam,45,GGAGCCTT,0,I,1,0,40 -exampleBAM.bam,45,CAGATCCA,0,I,1,0,40 -exampleBAM.bam,45,2,1,D,5,0,40 -exampleBAM.bam,45,14,1,I,5,0,40 -exampleBAM.bam,45,GCAATCCA,0,I,1,0,40 -exampleBAM.bam,22,TC,0,M,1,0,40 -exampleBAM.bam,45,GAGTGTTG,0,D,1,0,40 -exampleBAM.bam,15,AA,0,M,2,0,40 -exampleBAM.bam,45,GGGTTAGG,0,I,2,0,40 -exampleBAM.bam,45,TATATCAA,0,D,1,0,40 -exampleBAM.bam,17,62,1,M,1,0,40 -exampleBAM.bam,23,TT,0,M,1,0,40 -exampleBAM.bam,45,CATGATTC,0,I,1,0,40 -exampleBAM.bam,45,32,1,D,5,0,40 -exampleBAM.bam,45,44,1,I,5,0,40 -exampleBAM.bam,45,ATCCAGTT,0,I,1,0,40 -exampleBAM.bam,45,CAGTTCTA,0,D,1,0,40 -exampleBAM.bam,45,CAATCCAT,0,I,1,0,40 -exampleBAM.bam,45,TGATTCTA,0,D,1,0,40 -exampleBAM.bam,45,TCGTCCAT,0,I,1,0,40 -exampleBAM.bam,24,GT,0,M,2,0,40 -exampleBAM.bam,24,13,1,M,3,0,40 -exampleBAM.bam,30,34,1,M,1,0,40 -exampleBAM.bam,29,AC,0,M,1,0,40 -exampleBAM.bam,29,7,1,M,1,0,40 -exampleBAM.bam,32,49,1,M,1,0,40 -exampleBAM.bam,25,74,1,M,1,0,40 -exampleBAM.bam,27,40,1,M,1,0,40 -exampleBAM.bam,28,39,1,M,1,0,40 -exampleBAM.bam,45,TTGCAATC,0,D,1,0,40 -exampleBAM.bam,33,TT,0,M,4,0,40 -exampleBAM.bam,30,69,1,M,1,0,40 -exampleBAM.bam,45,71,1,D,5,0,40 -exampleBAM.bam,45,75,1,I,5,0,40 -exampleBAM.bam,45,AGCAAAAT,0,D,1,0,40 -exampleBAM.bam,32,19,1,M,1,0,40 -exampleBAM.bam,32,TC,0,M,3,0,40 -exampleBAM.bam,29,37,1,M,1,0,40 -exampleBAM.bam,27,CA,0,M,2,0,40 -exampleBAM.bam,45,ATAAAGAC,0,D,1,0,40 -exampleBAM.bam,45,CACTGATG,0,I,1,0,40 -exampleBAM.bam,45,CAGCCTCG,0,D,1,0,40 -exampleBAM.bam,45,GCACTTTC,0,D,1,0,40 -exampleBAM.bam,25,14,1,M,1,0,40 -exampleBAM.bam,34,23,1,M,1,0,40 -exampleBAM.bam,6,52,1,M,1,1,1 -exampleBAM.bam,45,TGATATAA,0,I,1,0,40 -exampleBAM.bam,45,GGTTATCA,0,D,1,0,40 -exampleBAM.bam,45,TTATATCA,0,D,1,0,40 -exampleBAM.bam,45,TCACTGAT,0,D,1,0,40 -exampleBAM.bam,45,GTGGCCTG,0,I,1,0,40 -exampleBAM.bam,45,3,1,D,5,0,40 -exampleBAM.bam,45,15,1,I,5,0,40 -exampleBAM.bam,17,63,1,M,1,0,40 -exampleBAM.bam,23,TG,0,M,1,0,40 -exampleBAM.bam,45,TTTGTATT,0,D,1,0,40 -exampleBAM.bam,24,GG,0,M,2,0,40 -exampleBAM.bam,30,35,1,M,2,0,40 -exampleBAM.bam,45,TATCATGG,0,D,1,0,40 -exampleBAM.bam,45,TGACATGG,0,D,1,0,40 -exampleBAM.bam,45,AGATCCAG,0,I,1,0,40 -exampleBAM.bam,45,33,1,D,5,0,40 -exampleBAM.bam,45,45,1,I,5,0,40 -exampleBAM.bam,45,GGAGATTA,0,I,1,0,40 -exampleBAM.bam,45,ATGGTATT,0,D,1,0,40 -exampleBAM.bam,45,ATCTCCAG,0,I,1,0,40 -exampleBAM.bam,45,CGGGTTCG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTAG,0,D,1,0,40 -exampleBAM.bam,45,AGGGTTCG,0,I,1,0,40 -exampleBAM.bam,45,68,1,D,5,0,40 -exampleBAM.bam,45,72,1,I,5,0,40 -exampleBAM.bam,45,AGTCAATG,0,D,1,0,40 -exampleBAM.bam,33,18,1,M,1,0,40 -exampleBAM.bam,33,TA,0,M,1,0,40 -exampleBAM.bam,45,TGGGTTAG,0,D,1,0,40 -exampleBAM.bam,45,TGGGTTCG,0,I,1,0,40 -exampleBAM.bam,45,TTTTCTGT,0,D,1,0,40 -exampleBAM.bam,4,TT,0,M,1,1,1 -exampleBAM.bam,29,4,1,M,1,0,40 -exampleBAM.bam,25,73,1,M,1,0,40 -exampleBAM.bam,45,AGCCTTTG,0,D,1,0,40 -exampleBAM.bam,45,ACTCTTTG,0,D,1,0,40 -exampleBAM.bam,18,58,1,M,1,1,1 -exampleBAM.bam,45,ATTATTGA,0,I,1,0,40 -exampleBAM.bam,45,ACATGATC,0,I,1,0,40 -exampleBAM.bam,28,AA,0,M,1,0,40 -exampleBAM.bam,33,48,1,M,1,0,40 -exampleBAM.bam,45,GTTAGGGT,0,D,3,0,40 -exampleBAM.bam,32,16,1,M,2,0,40 -exampleBAM.bam,32,TG,0,M,2,0,40 -exampleBAM.bam,45,GGCCTGAA,0,D,1,0,40 -exampleBAM.bam,45,12,1,I,5,0,40 -exampleBAM.bam,45,AGATTAGA,0,I,1,0,40 -exampleBAM.bam,45,GCAGCCTC,0,I,1,0,40 -exampleBAM.bam,45,AATCCATT,0,I,1,0,40 -exampleBAM.bam,45,CTTTATAT,0,I,1,0,40 -exampleBAM.bam,45,CATGGTGG,0,I,1,0,40 -exampleBAM.bam,22,TT,0,M,1,0,40 -exampleBAM.bam,24,45,1,M,1,0,40 -exampleBAM.bam,25,GT,0,M,3,0,40 -exampleBAM.bam,31,34,1,M,1,0,40 -exampleBAM.bam,34,20,1,M,1,0,40 -exampleBAM.bam,45,34,1,D,5,0,40 -exampleBAM.bam,45,46,1,I,5,0,40 -exampleBAM.bam,45,ATGAGTCA,0,I,1,0,40 -exampleBAM.bam,22,51,1,M,1,0,40 -exampleBAM.bam,45,TTTTTCTG,0,D,1,0,40 -exampleBAM.bam,45,GGGTTGGG,0,I,3,0,40 -exampleBAM.bam,45,GGTTTGGG,0,I,2,0,40 -exampleBAM.bam,45,TTAGATTT,0,I,1,0,40 -exampleBAM.bam,30,32,1,M,1,0,40 -exampleBAM.bam,23,19,1,M,1,0,40 -exampleBAM.bam,23,TC,0,M,1,0,40 -exampleBAM.bam,25,47,1,M,1,0,40 -exampleBAM.bam,10,75,1,M,1,0,40 -exampleBAM.bam,11,GG,0,M,1,0,40 -exampleBAM.bam,33,TC,0,M,6,0,40 -exampleBAM.bam,45,TGATCGTG,0,I,1,0,40 -exampleBAM.bam,45,CAGGTTAT,0,D,1,0,40 -exampleBAM.bam,45,CCAGTTCT,0,I,1,0,40 -exampleBAM.bam,45,69,1,D,5,0,40 -exampleBAM.bam,45,73,1,I,5,0,40 -exampleBAM.bam,32,51,1,M,1,0,40 -exampleBAM.bam,29,AT,0,M,2,0,40 -exampleBAM.bam,29,5,1,M,1,0,40 -exampleBAM.bam,33,49,1,M,1,0,40 -exampleBAM.bam,45,TATTGATA,0,I,1,0,40 -exampleBAM.bam,45,CCATGATA,0,I,1,0,40 -exampleBAM.bam,32,TT,0,M,2,0,40 -exampleBAM.bam,45,TGGGGGTT,0,D,1,0,40 -exampleBAM.bam,45,TTAGGGTT,0,D,3,0,40 -exampleBAM.bam,45,TTCGGGTT,0,D,3,0,40 -exampleBAM.bam,45,TTGGGGTT,0,D,1,0,40 -exampleBAM.bam,45,TTTGGGTT,0,D,1,0,40 -exampleBAM.bam,45,GTTGGGTT,0,D,1,0,40 -exampleBAM.bam,45,GTTATCAT,0,D,1,0,40 -exampleBAM.bam,45,CGTCCATG,0,I,1,0,40 -exampleBAM.bam,45,CCACCATG,0,I,1,0,40 -exampleBAM.bam,45,AAGACACA,0,D,1,0,40 -exampleBAM.bam,45,1,1,D,5,0,40 -exampleBAM.bam,45,13,1,I,5,0,40 -exampleBAM.bam,45,CTGGGGTT,0,D,1,0,40 -exampleBAM.bam,22,TG,0,M,3,0,40 -exampleBAM.bam,25,GG,0,M,2,0,40 -exampleBAM.bam,8,CA,0,M,1,0,40 -exampleBAM.bam,34,21,1,M,1,0,40 -exampleBAM.bam,24,GA,0,M,1,0,40 -exampleBAM.bam,45,GTGTTGGT,0,I,1,0,40 -exampleBAM.bam,45,TCACATGA,0,D,1,0,40 -exampleBAM.bam,45,GTCCATGA,0,D,1,0,40 -exampleBAM.bam,45,CACCATGA,0,D,1,0,40 -exampleBAM.bam,45,35,1,D,5,0,40 -exampleBAM.bam,45,47,1,I,5,0,40 -exampleBAM.bam,45,CTATTCTT,0,D,1,0,40 -exampleBAM.bam,45,AATCTAAT,0,D,1,0,40 -exampleBAM.bam,25,46,1,M,1,0,40 -exampleBAM.bam,27,76,1,M,1,0,40 -exampleBAM.bam,34,55,1,M,1,0,40 -exampleBAM.bam,31,1,1,M,1,0,40 -exampleBAM.bam,23,18,1,M,1,0,40 -exampleBAM.bam,31,66,1,M,1,0,40 -exampleBAM.bam,45,GAGATTAG,0,D,1,0,40 -exampleBAM.bam,45,TTCAGGCC,0,D,1,0,40 -exampleBAM.bam,13,AA,0,M,1,0,40 -exampleBAM.bam,45,GGTTAATG,0,D,1,0,40 -exampleBAM.bam,45,GGTGGAGC,0,D,1,0,40 -exampleBAM.bam,21,TT,0,M,1,0,40 -exampleBAM.bam,21,17,1,M,1,0,40 -exampleBAM.bam,12,AG,0,M,1,0,40 -exampleBAM.bam,45,GGCCACCA,0,I,1,0,40 -exampleBAM.bam,45,GCTGGGGT,0,D,1,0,40 -exampleBAM.bam,45,CTTGGCTT,0,I,1,0,40 -exampleBAM.bam,45,66,1,D,5,0,40 -exampleBAM.bam,26,GT,0,M,1,0,40 -exampleBAM.bam,45,TAATCTCC,0,D,1,0,40 -exampleBAM.bam,45,GTTGGGGT,0,D,1,0,40 -exampleBAM.bam,28,34,1,M,1,0,40 -exampleBAM.bam,45,TTGGGGGT,0,D,1,0,40 -exampleBAM.bam,17,58,1,M,1,0,40 -exampleBAM.bam,31,6,1,M,1,0,40 -exampleBAM.bam,45,CCTTTGCA,0,I,1,0,40 -exampleBAM.bam,45,36,1,D,5,0,40 -exampleBAM.bam,45,40,1,I,5,0,40 -exampleBAM.bam,45,CAGGCACC,0,D,1,0,40 -exampleBAM.bam,45,GTTCTAGA,0,I,1,0,40 -exampleBAM.bam,45,TATTTGCA,0,I,1,0,40 -exampleBAM.bam,34,TA,0,M,1,0,40 -exampleBAM.bam,25,CC,0,M,1,0,40 -exampleBAM.bam,22,23,1,M,1,0,40 -exampleBAM.bam,45,GAACTGGG,0,I,1,0,40 -exampleBAM.bam,45,6,1,D,5,0,40 -exampleBAM.bam,45,10,1,I,5,0,40 -exampleBAM.bam,45,GGGCTGGG,0,I,1,0,40 -exampleBAM.bam,45,TTGATATA,0,D,1,0,40 -exampleBAM.bam,45,TTCTTAAG,0,D,1,0,40 -exampleBAM.bam,27,GA,0,M,2,0,40 -exampleBAM.bam,27,14,1,M,1,0,40 -exampleBAM.bam,32,23,1,M,1,0,40 -exampleBAM.bam,21,50,1,M,1,0,40 -exampleBAM.bam,45,TAACCTGG,0,D,1,0,40 -exampleBAM.bam,45,TCTATTCT,0,I,1,0,40 -exampleBAM.bam,11,40,1,M,1,1,1 -exampleBAM.bam,45,TTTATTAT,0,D,1,0,40 -exampleBAM.bam,45,ATGATTCT,0,I,1,0,40 -exampleBAM.bam,45,CCTGGAGA,0,D,1,0,40 -exampleBAM.bam,45,GCCAGGCA,0,D,1,0,40 -exampleBAM.bam,12,AT,0,M,1,0,40 -exampleBAM.bam,32,53,1,M,1,0,40 -exampleBAM.bam,21,TG,0,M,3,0,40 -exampleBAM.bam,26,GG,0,M,1,0,40 -exampleBAM.bam,45,TCTGTGTC,0,D,1,0,40 -exampleBAM.bam,45,GTTGGGGG,0,D,1,0,40 -exampleBAM.bam,45,TTGGGCTG,0,I,1,0,40 -exampleBAM.bam,45,AAATCTAA,0,I,1,0,40 -exampleBAM.bam,45,67,1,D,5,0,40 -exampleBAM.bam,45,CTGGAGAT,0,D,1,0,40 -exampleBAM.bam,45,AGATTTTT,0,D,1,0,40 -exampleBAM.bam,45,AGGCACCC,0,I,1,0,40 -exampleBAM.bam,45,CTGAAAGT,0,I,1,0,40 -exampleBAM.bam,8,46,1,M,1,0,40 -exampleBAM.bam,45,TCCAGGTT,0,D,1,0,40 -exampleBAM.bam,45,GTGAGTGT,0,I,1,0,40 -exampleBAM.bam,24,CG,0,M,1,0,40 -exampleBAM.bam,45,TTATCATG,0,I,1,0,40 -exampleBAM.bam,45,ACAGCAAA,0,I,1,0,40 -exampleBAM.bam,45,37,1,D,5,0,40 -exampleBAM.bam,45,41,1,I,5,0,40 -exampleBAM.bam,45,AGTGCAAA,0,I,1,0,40 -exampleBAM.bam,34,TC,0,M,3,0,40 -exampleBAM.bam,25,CA,0,M,1,0,40 -exampleBAM.bam,30,AT,0,M,1,0,40 -exampleBAM.bam,45,TTTATATC,0,D,1,0,40 -exampleBAM.bam,45,TTACTCTT,0,D,1,0,40 -exampleBAM.bam,45,GTATTACT,0,I,1,0,40 -exampleBAM.bam,45,TGGTTAAT,0,D,1,0,40 -exampleBAM.bam,45,7,1,D,5,0,40 -exampleBAM.bam,45,11,1,I,5,0,40 -exampleBAM.bam,45,CCTGAAAG,0,D,1,0,40 -exampleBAM.bam,45,CTTTGCAC,0,I,1,0,40 -exampleBAM.bam,45,GTGAACTG,0,D,1,0,40 -exampleBAM.bam,45,TTGGCTTT,0,I,1,0,40 -exampleBAM.bam,28,2,1,M,1,0,40 -exampleBAM.bam,19,30,1,M,1,0,40 -exampleBAM.bam,27,GT,0,M,1,0,40 -exampleBAM.bam,45,64,1,D,5,0,40 -exampleBAM.bam,45,76,1,I,5,0,40 -exampleBAM.bam,45,AGTGTTGG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTGG,0,I,1,0,40 -exampleBAM.bam,45,GATTCTAT,0,I,1,0,40 -exampleBAM.bam,45,AGACACAG,0,D,1,0,40 -exampleBAM.bam,45,GGGGTTGG,0,I,2,0,40 -exampleBAM.bam,15,68,1,M,1,0,40 -exampleBAM.bam,45,TATAAAGA,0,I,1,0,40 -exampleBAM.bam,33,22,1,M,2,0,40 -exampleBAM.bam,12,AA,0,M,1,0,40 -exampleBAM.bam,32,54,1,M,1,0,40 -exampleBAM.bam,45,CTCGTCCA,0,I,1,0,40 -exampleBAM.bam,45,38,1,D,5,0,40 -exampleBAM.bam,45,42,1,I,5,0,40 -exampleBAM.bam,45,TTAAGTGA,0,I,1,0,40 -exampleBAM.bam,45,TTTGCAAT,0,I,1,0,40 -exampleBAM.bam,45,TTTGCACT,0,D,1,0,40 -exampleBAM.bam,24,CC,0,M,2,0,40 -exampleBAM.bam,45,TGAGTCAA,0,D,1,0,40 -exampleBAM.bam,6,TT,0,M,2,1,3 -exampleBAM.bam,31,4,1,M,1,0,40 -exampleBAM.bam,31,AG,0,M,2,0,40 -exampleBAM.bam,34,50,1,M,1,0,40 -exampleBAM.bam,27,73,1,M,1,0,40 -exampleBAM.bam,45,GACACAGC,0,D,1,0,40 -exampleBAM.bam,45,AACCTGGA,0,I,1,0,40 -exampleBAM.bam,45,4,1,D,5,0,40 -exampleBAM.bam,45,8,1,I,5,0,40 -exampleBAM.bam,16,58,1,M,1,0,40 -exampleBAM.bam,30,AA,0,M,2,0,40 -exampleBAM.bam,24,41,1,M,1,0,40 -exampleBAM.bam,34,TG,0,M,3,0,40 -exampleBAM.bam,29,68,1,M,1,0,40 -exampleBAM.bam,25,9,1,M,1,0,40 -exampleBAM.bam,26,44,1,M,1,0,40 -exampleBAM.bam,45,GGTATTAC,0,D,1,0,40 -exampleBAM.bam,45,TGTGAACT,0,I,1,0,40 -exampleBAM.bam,45,TGGCCTGA,0,D,1,0,40 -exampleBAM.bam,5,22,1,M,1,0,40 -exampleBAM.bam,45,AAGTGCAA,0,I,1,0,40 -exampleBAM.bam,45,ATTTGCAA,0,I,1,0,40 -exampleBAM.bam,45,ATCTAATC,0,D,1,0,40 -exampleBAM.bam,27,GG,0,M,1,0,40 -exampleBAM.bam,21,48,1,M,1,0,40 -exampleBAM.bam,45,TGAGTGTT,0,D,1,0,40 -exampleBAM.bam,13,39,1,M,1,0,40 -exampleBAM.bam,45,TAAAGACA,0,D,1,0,40 -exampleBAM.bam,33,23,1,M,1,0,40 -exampleBAM.bam,45,GTGGAGCC,0,I,1,0,40 -exampleBAM.bam,45,TTTCACAT,0,D,1,0,40 -exampleBAM.bam,45,65,1,D,5,0,40 -exampleBAM.bam,45,GATTTTTC,0,D,1,0,40 -exampleBAM.bam,45,AGTTCTAG,0,I,1,0,40 -exampleBAM.bam,19,61,1,M,1,0,40 -exampleBAM.bam,28,71,1,M,1,0,40 -exampleBAM.bam,15,35,1,M,1,0,40 -exampleBAM.bam,24,CA,0,M,1,0,40 -exampleBAM.bam,24,10,1,M,1,1,1 -exampleBAM.bam,45,TTATTGAT,0,D,1,0,40 -exampleBAM.bam,45,ATAACCTG,0,I,1,0,40 -exampleBAM.bam,45,GAAAGTGC,0,I,1,0,40 -exampleBAM.bam,45,39,1,D,5,0,40 -exampleBAM.bam,45,43,1,I,5,0,40 -exampleBAM.bam,31,AT,0,M,2,0,40 -exampleBAM.bam,31,5,1,M,1,0,40 -exampleBAM.bam,34,51,1,M,1,0,40 -exampleBAM.bam,27,72,1,M,1,0,40 -exampleBAM.bam,30,AC,0,M,1,0,40 -exampleBAM.bam,45,CATGGTAT,0,D,1,0,40 -exampleBAM.bam,45,ATGATCGT,0,I,1,0,40 -exampleBAM.bam,45,5,1,D,5,0,40 -exampleBAM.bam,45,9,1,I,5,0,40 -exampleBAM.bam,45,GCACCCAG,0,I,1,0,40 -exampleBAM.bam,34,TT,0,M,6,0,40 -exampleBAM.bam,31,39,1,M,2,0,40 -exampleBAM.bam,14,33,1,M,1,0,40 -EOF From 0509d316d921d99fd37055a4f5efa60c5cca2bec Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 23 Mar 2012 16:15:19 -0400 Subject: [PATCH 27/29] More information in the recalibration report * added empirical quality counts to allow quantization during on-the-fly recalibration to any level * added number of observations and errors to all tables to enable plotting of all covariates --- .../sting/gatk/walkers/bqsr/RecalDataManager.java | 5 ++--- .../sting/utils/recalibration/BaseRecalibration.java | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 742be4bbd..a2edd2806 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -63,12 +63,13 @@ public class RecalDataManager { public final static String ARGUMENT_VALUE_COLUMN_NAME = "Value"; public final static String QUANTIZED_VALUE_COLUMN_NAME = "QuantizedScore"; + public static final String QUANTIZED_COUNT_COLUMN_NAME = "Count"; public final static String READGROUP_COLUMN_NAME = "ReadGroup"; public final static String EVENT_TYPE_COLUMN_NAME = "EventType"; public final static String EMPIRICAL_QUALITY_COLUMN_NAME = "EmpiricalQuality"; public final static String ESTIMATED_Q_REPORTED_COLUMN_NAME = "EstimatedQReported"; public final static String QUALITY_SCORE_COLUMN_NAME = "QualityScore"; - public final static String COVARIATE_VALUE_SCORE_COLUMN_NAME = "CovariateValue"; + public final static String COVARIATE_VALUE_COLUMN_NAME = "CovariateValue"; public final static String COVARIATE_NAME_COLUMN_NAME = "CovariateName"; public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams @@ -77,8 +78,6 @@ public class RecalDataManager { private static boolean warnUserNullPlatform = false; - - public enum SOLID_RECAL_MODE { /** * Treat reference inserted bases as reference matching bases. Very unsafe! diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index a000732c4..cf44e7c36 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -124,7 +124,7 @@ public class BaseRecalibration { ArrayList columnNamesOrderedList = new ArrayList(5); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_SCORE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); From b063bcd38d2fccf9eea8296d9bc7357edbea9668 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Mar 2012 21:02:21 -0400 Subject: [PATCH 28/29] Removing update0 support in VariantEval -- Now the only use for update0, calculating the number of processed loci, is centrally tracked in the walker itself not the evaluations. -- This allows us to avoid calling update0 are every genomic base in 100ks of evaluates when there are a lot of stratifications. -- No need to modify the integration tests, this optimization doesn't change the result of the calculation --- .../varianteval/VariantEvalWalker.java | 17 +++++++--- .../varianteval/evaluators/CountVariants.java | 5 +-- .../evaluators/GenotypePhasingEvaluator.java | 9 +---- .../MendelianViolationEvaluator.java | 23 +------------ .../evaluators/MultiallelicSummary.java | 10 +++--- .../evaluators/VariantEvaluator.java | 33 +++++++++---------- .../evaluators/VariantSummary.java | 16 ++++----- .../util/NewEvaluationContext.java | 12 +------ 8 files changed, 42 insertions(+), 83 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 4fc7a1f41..2b9f159ac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -214,6 +214,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // Public constants private static String ALL_SAMPLE_NAME = "all"; + // the number of processed bp for this walker + long nProcessedLoci = 0; + // Utility class private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); @@ -326,10 +329,10 @@ public class VariantEvalWalker extends RodWalker implements Tr */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( final NewEvaluationContext nec : evaluationContexts.values() ) { - synchronized (nec) { - nec.update0(tracker, ref, context); - } + // we track the processed bp and expose this for modules instead of wasting CPU power on calculating + // the same thing over and over in evals that want the processed bp + synchronized (this) { + nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); } if (tracker != null) { @@ -455,7 +458,7 @@ public class VariantEvalWalker extends RodWalker implements Tr if ( lenientMatch == null ) lenientMatch = comp; break; case NO_MATCH: - ; + // do nothing } } @@ -581,6 +584,10 @@ public class VariantEvalWalker extends RodWalker implements Tr public Set getJexlExpressions() { return jexlExpressions; } + public long getnProcessedLoci() { + return nProcessedLoci; + } + public Set getContigNames() { final TreeSet contigs = new TreeSet(); for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 6fc4208ee..3a2635121 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -89,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { return 1; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; @@ -192,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval { } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); variantRate = perLocusRate(nVariantLoci); variantRatePerBp = perLocusRInverseRate(nVariantLoci); heterozygosity = perLocusRate(nHets); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index 2f9671d90..41979798e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { double minPhaseQuality = 10.0; public void initialize(VariantEvalWalker walker) { + super.initialize(walker); this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality()); this.samplePrevGenotypes = new SamplePreviousGenotypes(); } @@ -294,14 +295,6 @@ class CompEvalGenotypes { public Genotype getEvalGenotype() { return evalGt; } - - public void setCompGenotype(Genotype compGt) { - this.compGt = compGt; - } - - public void setEvalGenotype(Genotype evalGt) { - this.evalGt = evalGt; - } } class SamplePreviousGenotypes { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 7f3bf6290..db2bf61c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; import java.util.Map; import java.util.Set; @@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description = "Number of mendelian violations found", format = "%d") long nViolations; - - /*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d") - long KidHomRef_ParentHomVar; - @DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d") - long KidHet_ParentsHomRef; - @DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d") - long KidHet_ParentsHomVar; - @DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d") - long KidHomVar_ParentHomRef; - */ - @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d") long mvRefRef_Var; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d") @@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d") long mvVarVar_Het; - - /*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d") - long nInheritedVar; - @DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d") - long nInheritedRef;*/ - @DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d") long HomRefHomRef_HomRef; @DataPoint(description="Number of Het/Het/Het trios", format = "%d") @@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator { long HomVarHet_inheritedVar; MendelianViolation mv; - PrintStream mvFile; Map> families; public void initialize(VariantEvalWalker walker) { - //Changed by Laurent Francioli - 2011-06-07 - //mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); + super.initialize(walker); mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false); families = walker.getSampleDB().getFamilies(); } public boolean enabled() { - //return getVEWalker().FAMILY_STRUCTURE != null; return true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 1c34be4a1..90c2def0b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -28,11 +28,12 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Evaluation summary for multi-allelic variants") public class MultiallelicSummary extends VariantEvaluator implements StandardEval { @@ -90,10 +91,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva @Override public boolean enabled() { return true; } @Override public int getComparisonOrder() { return 2; } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( eval == null || eval.isMonomorphicInSamples() ) return null; @@ -152,6 +149,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 7e5cf37ff..d5cf685de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Collection; - public abstract class VariantEvaluator { - public void initialize(VariantEvalWalker walker) {} + private VariantEvalWalker walker; + + public void initialize(VariantEvalWalker walker) { + this.walker = walker; + } + + public VariantEvalWalker getWalker() { + return walker; + } public abstract boolean enabled(); @@ -19,9 +23,8 @@ public abstract class VariantEvaluator { public abstract int getComparisonOrder(); // called at all sites, regardless of eval context itself; useful for counting processed bases - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - } + // No longer available. The processed bp is kept in VEW itself for performance reasons + // public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return null; @@ -45,17 +48,13 @@ public abstract class VariantEvaluator { return ((double)num) / (Math.max(denom, 1)); } - public boolean stateIsApplicable(StateKey stateKey) { - return true; - } - /** * Returns true if the variant in vc was a singleton in the original input evaluation * set, regardless of variant context subsetting that has occurred. - * @param eval + * @param eval the VariantContext being assessed for this previous status as a singleton * @return true if eval was originally a singleton site */ - protected static final boolean variantWasSingleton(final VariantContext eval) { + protected static boolean variantWasSingleton(final VariantContext eval) { return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); } @@ -66,7 +65,7 @@ public abstract class VariantEvaluator { * @param all number of all variants * @return a String novelty rate, or NA if all == 0 */ - protected static final String formattedNoveltyRate(final int known, final int all) { + protected static String formattedNoveltyRate(final int known, final int all) { return formattedPercent(all - known, all); } @@ -77,7 +76,7 @@ public abstract class VariantEvaluator { * @param total count of all objects, including x * @return a String percent rate, or NA if total == 0 */ - protected static final String formattedPercent(final int x, final int total) { + protected static String formattedPercent(final int x, final int total) { return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total)); } @@ -88,7 +87,7 @@ public abstract class VariantEvaluator { * @param denom number of observations in the denumerator * @return a String formatted ratio, or NA if all == 0 */ - protected static final String formattedRatio(final int num, final int denom) { + protected static String formattedRatio(final int num, final int denom) { return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index 31f9a4f78..64161ac34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { /** Indels with size greater than this value are tallied in the CNV column */ private final static int MAX_INDEL_LENGTH = 50; private final static double MIN_CNV_OVERLAP = 0.5; - private VariantEvalWalker walker; public enum Type { SNP, INDEL, CNV @@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { public void initialize(VariantEvalWalker walker) { - this.walker = walker; + super.initialize(walker); nSamples = walker.getSampleNamesForEvaluation().size(); countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation()); @@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return 2; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - - private final Type getType(VariantContext vc) { + private Type getType(VariantContext vc) { switch (vc.getType()) { case SNP: return Type.SNP; @@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { } } - private final boolean overlapsKnownCNV(VariantContext cnv) { + private boolean overlapsKnownCNV(VariantContext cnv) { if ( knownCNVs != null ) { - final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true); + final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true); IntervalTree intervalTree = knownCNVs.get(loc.getContig()); final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); @@ -252,13 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return null; // we don't capture any interesting sites } - private final String noveltyRate(Type type) { + private String noveltyRate(Type type) { final int all = allVariantCounts.all(type); final int known = knownVariantCounts.all(type); return formattedNoveltyRate(known, all); } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); nSNPs = allVariantCounts.all(Type.SNP); nIndels = allVariantCounts.all(Type.INDEL); nSVs = allVariantCounts.all(Type.CNV); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index f9d8e437b..09f2c0168 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -23,9 +23,7 @@ public class NewEvaluationContext extends HashMap { final VariantEvaluator eval = c.newInstance(); eval.initialize(walker); - if (eval.stateIsApplicable(stateKey)) { - evaluationInstances.put(c.getSimpleName(), eval); - } + evaluationInstances.put(c.getSimpleName(), eval); } catch (InstantiationException e) { throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'"); } catch (IllegalAccessException e) { @@ -40,8 +38,6 @@ public class NewEvaluationContext extends HashMap { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - // the other updateN methods don't see a null context if ( tracker == null ) continue; @@ -65,10 +61,4 @@ public class NewEvaluationContext extends HashMap { } } } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { - evaluation.update0(tracker, ref, context); - } - } }