diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R index 64fbcc50a..4c228ccb4 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R @@ -99,69 +99,77 @@ gsa.read.gatkreportv0 <- function(lines) { # Load all GATKReport v1 tables from file gsa.read.gatkreportv1 <- function(lines) { + #print("loading with optimized v1 reader") + nLines = length(lines) + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = NULL; + version = ""; + rowCount = 0 + headerRowCount = -1; + + finishTable <- function() { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows[1:rowCount,], tableEnv); + } + + for (line in lines) { - tableEnv = new.env(); - - tableName = NA; - tableHeader = c(); - tableRows = c(); - version = ""; - headerRowCount = -1; - - for (line in lines) { - - if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) { - version = "v1.0"; - headerRowCount = 0; - } - - if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) { - #print("Trying to start a table with line:"); - #print(line); - - #Get table header - headerFields = unlist(strsplit(line, ":")); - - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); - } - - tableName = headerFields[3]; - tableHeader = c(); - tableRows = c(); - - columnStarts = c(); - - } - - if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) { - headerRowCount = headerRowCount+1; - #print("Header Row count is at:") - #print(headerRowCount); - } else if (!is.na(tableName)) { - if ( version == "v1.0") { - if (length(tableHeader) == 0) { - headerChars = unlist(strsplit(line, "")); - # Find the first position of non space characters, excluding the first character - columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); - } - - row = .gsa.splitFixedWidth(line, columnStarts); - } - - if (length(tableHeader) == 0) { - tableHeader = row; - } else if ( nchar(line) > 0 ) { - tableRows = rbind(tableRows, row); - } - } + if (length(grep("^#:GATKReport.v1", line, ignore.case=TRUE)) > 0) { + version = "v1.0"; + headerRowCount = 0; } - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + if ( (headerRowCount %% 2 == 1) && (version == "v1.0") ) { + #print("Trying to start a table with line:"); + #print(line); + + #Get table header + headerFields = unlist(strsplit(line, ":")); + + if (!is.na(tableName)) { + finishTable() + } + + tableName = headerFields[3]; + tableHeader = c(); + tableRows = NULL + rowCount = 0 + + columnStarts = c(); } - gatkreport = as.list(tableEnv, all.names=TRUE); + if (length(grep("^#:GATKTable", line, ignore.case=TRUE)) > 0) { + headerRowCount = headerRowCount+1; + #print("Header Row count is at:") + #print(headerRowCount); + } else if (!is.na(tableName)) { + if ( version == "v1.0") { + if (length(tableHeader) == 0) { + headerChars = unlist(strsplit(line, "")); + # Find the first position of non space characters, excluding the first character + columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); + tableRows = matrix(nrow=nLines, ncol=length(columnStarts)+1); + } + + row = .gsa.splitFixedWidth(line, columnStarts); + } + + if (length(tableHeader) == 0) { + tableHeader = row; + } else if ( nchar(line) > 0 ) { + rowCount = rowCount + 1 + tableRows[rowCount,] <- row + } + } + } + + if (!is.na(tableName)) { + finishTable() + } + + gatkreport = as.list(tableEnv, all.names=TRUE); } # Load all GATKReport tables from a file diff --git a/public/R/titvFPEst.R b/public/R/titvFPEst.R deleted file mode 100755 index 7af5e8bbb..000000000 --- a/public/R/titvFPEst.R +++ /dev/null @@ -1,138 +0,0 @@ -titvFPEst <- function(titvExpected, titvObserved) { max(min(1 - (titvObserved - 0.5) / (titvExpected - 0.5), 1), 0.001) } - -titvFPEstV <- function(titvExpected, titvs) { - sapply(titvs, function(x) titvFPEst(titvExpected, x)) -} - -calcHet <- function(nknown, knownTiTv, nnovel, novelTiTv, callable) { - TP <- nknown + (1-titvFPEst(knownTiTv, novelTiTv)) * nnovel - 2 * TP / 3 / callable -} - -marginalTiTv <- function( nx, titvx, ny, titvy ) { - tvx = nx / (titvx + 1) - tix = nx - tvx - tvy = ny / (titvy + 1) - tiy = ny - tvy - tiz = tix - tiy - tvz = tvx - tvy - return(tiz / tvz) -} -marginaldbSNPRate <- function( nx, dbx, ny, dby ) { - knownx = nx * dbx / 100 - novelx = nx - knownx - knowny = ny * dby / 100 - novely = ny - knowny - knownz = knownx - knowny - novelz = novelx - novely - return(knownz / ( knownz + novelz ) * 100) -} - -numExpectedCalls <- function(L, theta, calledFractionOfRegion, nIndividuals, dbSNPRate) { - nCalls <- L * theta * calledFractionOfRegion * sum(1 / seq(1, 2 * nIndividuals)) - return(list(nCalls = nCalls, nKnown = dbSNPRate * nCalls, nNovel = (1-dbSNPRate) * nCalls)) -} - -normalize <- function(x) { - x / sum(x) -} - -normcumsum <- function(x) { - cumsum(normalize(x)) -} - -cumhist <- function(d, ...) { - plot(d[order(d)], type="b", col="orange", lwd=2, ...) -} - -revcumsum <- function(x) { - return(rev(cumsum(rev(x)))) -} - -phred <- function(x) { - log10(max(x,10^(-9.9)))*-10 -} - -pOfB <- function(b, B, Q) { - #print(paste(b, B, Q)) - p = 1 - 10^(-Q/10) - if ( b == B ) - return(p) - else - return(1 - p) -} - -pOfG <- function(bs, qs, G) { - a1 = G[1] - a2 = G[2] - - log10p = 0 - for ( i in 1:length(bs) ) { - b = bs[i] - q = qs[i] - p1 = pOfB(b, a1, q) / 2 + pOfB(b, a2, q) / 2 - log10p = log10p + log10(p1) - } - - return(log10p) -} - -pOfGs <- function(nAs, nBs, Q) { - bs = c(rep("a", nAs), rep("t", nBs)) - qs = rep(Q, nAs + nBs) - G1 = c("a", "a") - G2 = c("a", "t") - G3 = c("t", "t") - - log10p1 = pOfG(bs, qs, G1) - log10p2 = pOfG(bs, qs, G2) - log10p3 = pOfG(bs, qs, G3) - Qsample = phred(1 - 10^log10p2 / sum(10^(c(log10p1, log10p2, log10p3)))) - - return(list(p1=log10p1, p2=log10p2, p3=log10p3, Qsample=Qsample)) -} - -QsampleExpected <- function(depth, Q) { - weightedAvg = 0 - for ( d in 1:(depth*3) ) { - Qsample = 0 - pOfD = dpois(d, depth) - for ( nBs in 0:d ) { - pOfnB = dbinom(nBs, d, 0.5) - nAs = d - nBs - Qsample = pOfGs(nAs, nBs, Q)$Qsample - #Qsample = 1 - weightedAvg = weightedAvg + Qsample * pOfD * pOfnB - print(as.data.frame(list(d=d, nBs = nBs, pOfD=pOfD, pOfnB = pOfnB, Qsample=Qsample, weightedAvg = weightedAvg))) - } - } - - return(weightedAvg) -} - -plotQsamples <- function(depths, Qs, Qmax) { - cols = rainbow(length(Qs)) - plot(depths, rep(Qmax, length(depths)), type="n", ylim=c(0,Qmax), xlab="Average sequencing coverage", ylab="Qsample", main = "Expected Qsample values, including depth and allele sampling") - - for ( i in 1:length(Qs) ) { - Q = Qs[i] - y = as.numeric(lapply(depths, function(x) QsampleExpected(x, Q))) - points(depths, y, col=cols[i], type="b") - } - - legend("topleft", paste("Q", Qs), fill=cols) -} - -pCallHetGivenDepth <- function(depth, nallelesToCall) { - depths = 0:(2*depth) - pNoAllelesToCall = apply(as.matrix(depths),1,function(d) sum(dbinom(0:nallelesToCall,d,0.5))) - dpois(depths,depth)*(1-pNoAllelesToCall) -} - -pCallHets <- function(depth, nallelesToCall) { - sum(pCallHetGivenDepth(depth,nallelesToCall)) -} - -pCallHetMultiSample <- function(depth, nallelesToCall, nsamples) { - 1-(1-pCallHets(depth,nallelesToCall))^nsamples -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 433c7d82f..1cea14a9d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -47,7 +47,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar /** * An exception that's occurred in this traversal. If null, no exception has occurred. */ - private Throwable error = null; + private RuntimeException error = null; /** * Queue of incoming shards. @@ -345,22 +345,22 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar return error != null; } - private synchronized StingException getTraversalError() { + private synchronized RuntimeException getTraversalError() { if(!hasTraversalErrorOccurred()) throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists"); - - // If the error is already a StingException, pass it along as is. Otherwise, wrap it. - if(error instanceof StingException) - return (StingException)error; - else - return new ReviewedStingException("An error occurred during the traversal.",error); + return error; } /** * Allows other threads to notify of an error during traversal. */ protected synchronized void notifyOfTraversalError(Throwable error) { - this.error = error; + // If the error is already a Runtime, pass it along as is. Otherwise, wrap it. + if (error instanceof RuntimeException) + this.error = (RuntimeException)error; + else + this.error = new ReviewedStingException("An error occurred during the traversal.", error); + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java index f5dee4961..18bf16d71 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/MalformedBAMErrorReformatingIterator.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.SAMFormatException; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -23,7 +22,7 @@ public class MalformedBAMErrorReformatingIterator implements CloseableIterator columnStarts = TextFormattingUtils.getWordStarts(columnLine); - String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); - - if (primaryKeyDisplay) { - addPrimaryKey(columnNames[0]); - - } else { - sortByPrimaryKey = true; - addPrimaryKey("id", false); - counter = 1; + } + String[] tableData = tableHeaders[0].split(":"); + String[] userData = tableHeaders[1].split(":"); + + // Fill in the fields + tableName = userData[2]; + tableDescription = (userData.length <= 3) ? "" : userData[3]; // table may have no description! (and that's okay) + primaryKeyDisplay = Boolean.parseBoolean(tableData[2]); + columns = new GATKReportColumns(); + + int nColumns = Integer.parseInt(tableData[3]); + int nRows = Integer.parseInt(tableData[4]); + + + // Read column names + String columnLine; + try { + columnLine = reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_COLUMN_NAMES); + } + + List columnStarts = TextFormattingUtils.getWordStarts(columnLine); + String[] columnNames = TextFormattingUtils.splitFixedWidth(columnLine, columnStarts); + + if (primaryKeyDisplay) { + addPrimaryKey(columnNames[0]); + + } else { + sortByPrimaryKey = true; + addPrimaryKey("id", false); + counter = 1; + } + // Put in columns using the format string from the header + for (int i = 0; i < nColumns; i++) { + String format = tableData[5 + i]; + if (primaryKeyDisplay) + addColumn(columnNames[i + 1], true, format); + else + addColumn(columnNames[i], true, format); + } + + for (int i = 0; i < nRows; i++) { + // read line + String dataLine; + try { + dataLine = reader.readLine(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_DATA_LINE + e.getMessage()); } - // Put in columns using the format string from the header - for (int i = 0; i < nColumns; i++) { - String format = tableData[5 + i]; - if (primaryKeyDisplay) - addColumn(columnNames[i + 1], true, format); - else - addColumn(columnNames[i], true, format); - } - - for (int i = 0; i < nRows; i++) { - // read line - List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(reader.readLine(), columnStarts)); - - for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) { - - //Input all the remaining values - GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType(); - - if (primaryKeyDisplay) { - String columnName = columnNames[columnIndex + 1]; - String primaryKey = lineSplits.get(0); - set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1))); - } else { - String columnName = columnNames[columnIndex]; - set(counter, columnName, type.Parse(lineSplits.get(columnIndex))); - } - + List lineSplits = Arrays.asList(TextFormattingUtils.splitFixedWidth(dataLine, columnStarts)); + + for (int columnIndex = 0; columnIndex < nColumns; columnIndex++) { + + //Input all the remaining values + GATKReportDataType type = getColumns().getByIndex(columnIndex).getDataType(); + + if (primaryKeyDisplay) { + String columnName = columnNames[columnIndex + 1]; + String primaryKey = lineSplits.get(0); + set(primaryKey, columnName, type.Parse(lineSplits.get(columnIndex + 1))); + } else { + String columnName = columnNames[columnIndex]; + set(counter, columnName, type.Parse(lineSplits.get(columnIndex))); } - counter++; + } - - + counter++; + } + + + try { reader.readLine(); - // When you see empty line or null, quit out - } - } catch (Exception e) { - //throw new StingException("Cannot read GATKReport: " + e); - e.printStackTrace(); + } catch (IOException e) { + throw new ReviewedStingException(COULD_NOT_READ_EMPTY_LINE + e.getMessage()); + } + break; + + default: + throw new ReviewedStingException(OLD_GATK_TABLE_VERSION); } } @@ -408,8 +431,7 @@ public class GATKReportTable { } catch (Exception e) { } } - if (column.getDataType().equals(GATKReportDataType.Byte) && - ((String) value).length() == 1) { + if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) { newValue = ((String) value).charAt(0); } @@ -418,12 +440,11 @@ public class GATKReportTable { if (newValue != null) value = newValue; - if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || - column.getDataType().equals(GATKReportDataType.Unknown) ) + // todo -- Types have to be more flexible. For example, %d should accept Integers, Shorts and Bytes. + if (column.getDataType().equals(GATKReportDataType.fromObject(value)) || column.getDataType().equals(GATKReportDataType.Unknown) ) columns.get(columnName).put(primaryKey, value); else - throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", - GATKReportDataType.fromObject(value).name(), column.getDataType().name())); + throw new ReviewedStingException(String.format("Tried to add an object of type: %s to a column of type: %s", GATKReportDataType.fromObject(value).name(), column.getDataType().name())); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 753740258..4a0c7a6da 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -31,8 +31,10 @@ public class LowMQ extends InfoFieldAnnotation { double total = 0; for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - ReadBackedPileup pileup = sample.getValue().getBasePileup(); - for (PileupElement p : pileup ) + if ( !sample.getValue().hasBasePileup() ) + continue; + + for ( PileupElement p : sample.getValue().getBasePileup() ) { if ( p.getMappingQual() == 0 ) { mq0 += 1; } if ( p.getMappingQual() <= 10 ) { mq10 += 1; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java index a30472ce8..8a9c626eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java @@ -2,10 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BitSetUtils; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR @@ -30,6 +27,7 @@ import java.util.List; public class BQSRKeyManager { private List requiredCovariates; private List optionalCovariates; + private Map covariateNameToIDMap; private int nRequiredBits; // Number of bits used to represent the required covariates private int nOptionalBits; // Number of bits used to represent the standard covaraites @@ -48,6 +46,7 @@ public class BQSRKeyManager { public BQSRKeyManager(List requiredCovariates, List optionalCovariates) { this.requiredCovariates = new ArrayList(requiredCovariates.size()); // initialize the required covariates list this.optionalCovariates = new ArrayList(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay) + this.covariateNameToIDMap = new HashMap(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates) nRequiredBits = 0; for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management @@ -57,14 +56,16 @@ public class BQSRKeyManager { nRequiredBits += nBits; } - short i = 0; + short id = 0; nOptionalBits = 0; for (Covariate optional : optionalCovariates) { int nBits = optional.numberOfBits(); // number of bits used by this covariate nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate - BitSet optionalID = BitSetUtils.bitSetFrom(i); // calculate the optional covariate ID for this covariate + BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object - i++; + String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport + this.covariateNameToIDMap.put(covariateName, id); + id++; } nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID @@ -92,7 +93,7 @@ public class BQSRKeyManager { * @return one key in bitset representation per covariate */ public List bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) { - List allBitSets = new LinkedList(); // Generate one key per optional covariate + List allBitSets = new LinkedList(); // Generate one key per optional covariate BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits @@ -147,7 +148,7 @@ public class BQSRKeyManager { if (optionalCovariates.size() > 0) { int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's - int covariateID = (Short) key[covariateIDIndex]; // get the optional covariate id + int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface @@ -162,7 +163,17 @@ public class BQSRKeyManager { return bitSetKey; } - + + /** + * Covariate id can be either the covariate name (String) or the actual id (short). This method + * finds it's type and converts accordingly to the short notation. + * + * @param id the string or short representation of the optional covariate id + * @return the short representation of the optional covariate id. + */ + private short parseCovariateID(Object id) { + return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id; + } /** * Generates a key set of objects from a combined bitset key. @@ -185,13 +196,27 @@ public class BQSRKeyManager { short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set - objectKeys.add(id); // add the covariate id + objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id } objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set return objectKeys; } + public List getRequiredCovariates() { + ArrayList list = new ArrayList(requiredCovariates.size()); + for (RequiredCovariateInfo info : requiredCovariates) + list.add(info.covariate); + return list; + } + + public List getOptionalCovariates() { + ArrayList list = new ArrayList(optionalCovariates.size()); + for (OptionalCovariateInfo info : optionalCovariates) + list.add(info.covariate); + return list; + } + /** * Translates a masked bitset into a bitset starting at 0 * @@ -253,7 +278,6 @@ public class BQSRKeyManager { return chopNBitsFrom(bitSet, leadingBits); } - /** * Aggregate information for each Covariate */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 3f3bc5040..7bc6cd754 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -188,7 +188,7 @@ public class CycleCovariate implements StandardCovariate { @Override public BitSet bitSetFromKey(Object key) { - return BitSetUtils.bitSetFrom((Short) key); + return (key instanceof String) ? BitSetUtils.bitSetFrom(Short.parseShort((String) key)) : BitSetUtils.bitSetFrom((Short) key); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java index cd2253e1a..4100eb8bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java @@ -79,8 +79,8 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public BitSet bitSetFromKey(Object key) { - return BitSetUtils.bitSetFrom((Byte) key); + public BitSet bitSetFromKey(Object key) { + return (key instanceof String) ? BitSetUtils.bitSetFrom(Byte.parseByte((String) key)) : BitSetUtils.bitSetFrom((Byte) key); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java index ad4f94f33..eb20f7779 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BitSetUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; +import java.util.regex.Pattern; /* * Copyright (c) 2009 The Broad Institute @@ -45,6 +47,10 @@ public class ReadGroupCovariate implements RequiredCovariate { private final HashMap readGroupLookupTable = new HashMap(); private final HashMap readGroupReverseLookupTable = new HashMap(); private short nextId = 0; + + private static final String LANE_TAG = "LN"; + private static final String SAMPLE_TAG = "SM"; + // Initialize any member variables using the command-line arguments passed to the walkers @Override @@ -54,14 +60,13 @@ public class ReadGroupCovariate implements RequiredCovariate { @Override public CovariateValues getValues(final GATKSAMRecord read) { final int l = read.getReadLength(); - final String readGroupId = read.getReadGroup().getReadGroupId(); - BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset + final String readGroupId = readGroupValueFromRG(read.getReadGroup()); + BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset BitSet[] readGroups = new BitSet[l]; Arrays.fill(readGroups, rg); return new CovariateValues(readGroups, readGroups, readGroups); } - // Used to get the covariate's value from input csv file during on-the-fly recalibration @Override public final Object getValue(final String str) { return str; @@ -77,15 +82,15 @@ public class ReadGroupCovariate implements RequiredCovariate { return bitSetForReadGroup((String) key); } - public final String decodeReadGroup(final short id) { - return readGroupReverseLookupTable.get(id); - } - @Override public int numberOfBits() { return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE); } - + + private String decodeReadGroup(final short id) { + return readGroupReverseLookupTable.get(id); + } + private BitSet bitSetForReadGroup(String readGroupId) { short shortId; if (readGroupLookupTable.containsKey(readGroupId)) @@ -98,6 +103,35 @@ public class ReadGroupCovariate implements RequiredCovariate { } return BitSetUtils.bitSetFrom(shortId); } + + /** + * Gather the sample and lane information from the read group record and return sample.lane + * + * If the bam file is missing the lane information, it tries to use the id regex standardized + * by the Broad Institute to extract the lane information + * + * If it fails to find either of the two pieces of information, will return the read group id instead. + * + * @param rg the read group record + * @return sample.lane or id if information is missing. + */ + private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) { + String lane = rg.getLane(); // take the sample's lane from the read group lane tag + String sample = rg.getSample(); // take the sample's name from the read group sample tag + String value = rg.getId(); // initialize the return value with the read group ID in case we can't find the sample or the lane + + if (lane == null) { // if this bam doesn't have the lane annotation in the read group try to take it from the read group id + String [] splitID = rg.getId().split(Pattern.quote(".")); + if (splitID.length > 1) // if the id doesn't follow the BROAD defined regex (PU.LANE), fall back to the read group id + lane = splitID[splitID.length - 1]; // take the lane from the readgroup id + } + + if (sample != null && lane != null) + value = sample + "." + lane; // the read group covariate is sample.lane (where the inforamtion is available) + + return value; + } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 5d1adaf40..a2edd2806 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -26,10 +26,15 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import net.sf.samtools.SAMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; @@ -37,10 +42,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -53,18 +55,28 @@ import java.util.Map; */ public class RecalDataManager { - public final NestedHashMap nestedHashMap; // The full dataset - private final HashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed - private final HashMap dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed - private final HashMap> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed + public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments"; + public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized"; + public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0"; + public final static String QUALITY_SCORE_REPORT_TABLE_TITLE = "RecalTable1"; + public final static String ALL_COVARIATES_REPORT_TABLE_TITLE = "RecalTable2"; + + public final static String ARGUMENT_VALUE_COLUMN_NAME = "Value"; + public final static String QUANTIZED_VALUE_COLUMN_NAME = "QuantizedScore"; + public static final String QUANTIZED_COUNT_COLUMN_NAME = "Count"; + public final static String READGROUP_COLUMN_NAME = "ReadGroup"; + public final static String EVENT_TYPE_COLUMN_NAME = "EventType"; + public final static String EMPIRICAL_QUALITY_COLUMN_NAME = "EmpiricalQuality"; + public final static String ESTIMATED_Q_REPORTED_COLUMN_NAME = "EstimatedQReported"; + public final static String QUALITY_SCORE_COLUMN_NAME = "QualityScore"; + public final static String COVARIATE_VALUE_COLUMN_NAME = "CovariateValue"; + public final static String COVARIATE_NAME_COLUMN_NAME = "CovariateName"; - public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private static boolean warnUserNullPlatform = false; - private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ public enum SOLID_RECAL_MODE { /** @@ -82,7 +94,20 @@ public class RecalDataManager { /** * Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference. */ - REMOVE_REF_BIAS + REMOVE_REF_BIAS; + + public static SOLID_RECAL_MODE recalModeFromString(String recalMode) { + if (recalMode.equals("DO_NOTHING")) + return SOLID_RECAL_MODE.DO_NOTHING; + if (recalMode.equals("SET_Q_ZERO")) + return SOLID_RECAL_MODE.SET_Q_ZERO; + if (recalMode.equals("SET_Q_ZERO_BASE_N")) + return SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N; + if (recalMode.equals("REMOVE_REF_BIAS")) + return SOLID_RECAL_MODE.REMOVE_REF_BIAS; + + throw new UserException.BadArgumentValue(recalMode, "is not a valid SOLID_RECAL_MODE value"); + } } public enum SOLID_NOCALL_STRATEGY { @@ -97,78 +122,125 @@ public class RecalDataManager { /** * Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses. */ - PURGE_READ - } + PURGE_READ; - public RecalDataManager() { - nestedHashMap = new NestedHashMap(); - dataCollapsedReadGroup = null; - dataCollapsedQualityScore = null; - dataCollapsedByCovariate = null; - } + public static SOLID_NOCALL_STRATEGY nocallStrategyFromString(String nocallStrategy) { + if (nocallStrategy.equals("THROW_EXCEPTION")) + return SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; + if (nocallStrategy.equals("LEAVE_READ_UNRECALIBRATED")) + return SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED; + if (nocallStrategy.equals("PURGE_READ")) + return SOLID_NOCALL_STRATEGY.PURGE_READ; - public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) { - if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration - nestedHashMap = null; - dataCollapsedReadGroup = new HashMap(); - dataCollapsedQualityScore = new HashMap(); - dataCollapsedByCovariate = new HashMap>(); - for (final EventType errorModel : EventType.values()) { - dataCollapsedReadGroup.put(errorModel, new NestedHashMap()); - dataCollapsedQualityScore.put(errorModel, new NestedHashMap()); - dataCollapsedByCovariate.put(errorModel, new ArrayList()); - for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate - dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap()); - } - } - } - else { - nestedHashMap = new NestedHashMap(); - dataCollapsedReadGroup = null; - dataCollapsedQualityScore = null; - dataCollapsedByCovariate = null; + throw new UserException.BadArgumentValue(nocallStrategy, "is not a valid SOLID_NOCALL_STRATEGY value"); } } - public static ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(COVARS_ATTRIBUTE); - } + public static void listAvailableCovariates(Logger logger) { + // Get a list of all available covariates + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - - private void checkForSingletons(final Map data) { - // todo -- this looks like it's better just as a data.valueSet() call? - for (Object comp : data.keySet()) { - final Object val = data.get(comp); - if (val instanceof RecalDatum) { // We are at the end of the nested hash maps - if (data.keySet().size() == 1) { - data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ... - // in a previous step of the sequential calculation model - } - } - else { // Another layer in the nested hash map - checkForSingletons((Map) val); - } - } + // Print and exit if that's what was requested + logger.info("Available covariates:"); + for (Class covClass : covariateClasses) + logger.info(covClass.getSimpleName()); + logger.info(""); } /** - * Get the appropriate collapsed table out of the set of all the tables held by this Object - * - * @param covariate Which covariate indexes the desired collapsed HashMap - * @return The desired collapsed HashMap + * Generates two lists : required covariates and optional covariates based on the user's requests. + * + * Performs the following tasks in order: + * 1. Adds all requierd covariates in order + * 2. Check if the user asked to use the standard covariates and adds them all if that's the case + * 3. Adds all covariates requested by the user that were not already added by the two previous steps + * + * @param argumentCollection the argument collection object for the recalibration walker + * @return a pair of ordered lists : required covariates (first) and optional covariates (second) */ - public final NestedHashMap getCollapsedTable(final int covariate, final EventType errorModel) { - if (covariate == 0) { - return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed - } - else if (covariate == 1) { - return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed - } - else { - return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed + public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); + final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); + + ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + ArrayList optionalCovariates = new ArrayList(); + if (argumentCollection.USE_STANDARD_COVARIATES) + optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user + + if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified + for (String requestedCovariateString : argumentCollection.COVARIATES) { + boolean foundClass = false; + for (Class covClass : covariateClasses) { + if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class + foundClass = true; + if (!requiredClasses.contains(covClass) && + (!argumentCollection.USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { + try { + final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it + optionalCovariates.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + } + } + + if (!foundClass) { + throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); + } + } } + return new Pair, ArrayList>(requiredCovariates, optionalCovariates); } + /** + * Initializes the recalibration table -> key manager map + * + * @param requiredCovariates list of required covariates (in order) + * @param optionalCovariates list of optional covariates (in order) + * @return a map with each key manager and it's corresponding recalibration table properly initialized + */ + public static LinkedHashMap> initializeTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { + final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + for (Covariate covariate : requiredCovariates) { + requiredCovariatesToAdd.add(covariate); + final Map recalTable = new HashMap(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively) + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + } + final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + return tablesAndKeysMap; + } + + /** + * Initializes the table -> key manager map (unfortunate copy of the above code with minor modifications to accomodate the different return types (RecalDatum vs EmpiricalQual objects) + * + * @param requiredCovariates list of required covariates (in order) + * @param optionalCovariates list of optional covariates (in order) + * @return a map with each key manager and it's corresponding recalibration table properly initialized + */ + public static LinkedHashMap> initializeEmpiricalTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { + final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + for (Covariate covariate : requiredCovariates) { + requiredCovariatesToAdd.add(covariate); + final Map recalTable = new HashMap(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively) + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + } + final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager + tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map + return tablesAndKeysMap; + } + + /** * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string * @@ -526,8 +598,9 @@ public class RecalDataManager { * * @param read The read for which to compute covariate values. * @param requestedCovariates The list of requested covariates. + * @return a matrix with all the covariates calculated for every base in the read */ - public static void computeCovariates(final GATKSAMRecord read, final List requestedCovariates) { + public static ReadCovariates computeCovariates(final GATKSAMRecord read, final List requestedCovariates) { final int numRequestedCovariates = requestedCovariates.size(); final int readLength = read.getReadLength(); final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates); @@ -536,7 +609,7 @@ public class RecalDataManager { for (Covariate covariate : requestedCovariates) readCovariates.addCovariate(covariate.getValues(read)); - read.setTemporaryAttribute(COVARS_ATTRIBUTE, readCovariates); + return readCovariates; } /** @@ -613,4 +686,42 @@ public class RecalDataManager { return base; } } + + + /** + * Adds the required covariates to a covariate list + * + * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addRequiredCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + if (classes.size() != 2) + throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); + + dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. + dest.add(new QualityScoreCovariate()); + return dest; + } + + /** + * Adds the standard covariates to a covariate list + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addStandardCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + for (Class covClass : classes) { + try { + final Covariate covariate = (Covariate) covClass.newInstance(); + dest.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + return dest; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index 91f865180..b7f88c524 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -87,6 +87,10 @@ public class RecalDatum extends RecalDatumOptimized { public final void calcCombinedEmpiricalQuality(final int smoothing, final int maxQual) { this.empiricalQuality = empiricalQualDouble(smoothing, maxQual); // cache the value so we don't call log over and over again } + + public final void calcEstimatedReportedQuality() { + this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / (double) numObservations); + } //--------------------------------------------------------------------------------------------------------------- // diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 40f28f644..a33ba8bd0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -50,7 +50,7 @@ public class RecalibrationArgumentCollection { * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument. */ @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false) - protected List> knownSites = Collections.emptyList(); + public List> knownSites = Collections.emptyList(); /** * After the header, data records occur one per line until the end of the file. The first several items on a line are the @@ -60,25 +60,25 @@ public class RecalibrationArgumentCollection { */ @Gather(BQSRGatherer.class) @Output - protected PrintStream RECAL_FILE; + public PrintStream RECAL_FILE; /** * List all implemented covariates. */ @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false) - protected boolean LIST_ONLY = false; + public boolean LIST_ONLY = false; /** * Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list. */ @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false) - protected String[] COVARIATES = null; + public String[] COVARIATES = null; /* * Use the standard set of covariates in addition to the ones listed using the -cov argument */ @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false) - protected boolean USE_STANDARD_COVARIATES = true; + public boolean USE_STANDARD_COVARIATES = true; ///////////////////////////// // Debugging-only Arguments @@ -88,7 +88,7 @@ public class RecalibrationArgumentCollection { */ @Hidden @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.") - protected boolean RUN_WITHOUT_DBSNP = false; + public boolean RUN_WITHOUT_DBSNP = false; /** * CountCovariates and TableRecalibration accept a --solid_recal_mode flag which governs how the recalibrator handles the @@ -152,6 +152,9 @@ public class RecalibrationArgumentCollection { @Hidden @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; + @Hidden + @Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output") + public int QUANTIZING_LEVELS = 16; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 1b73ef1d7..21f11d2ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -240,6 +240,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood refAllele = Allele.create(refBases, true); altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } + else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case if (Allele.acceptableAlleleBases(s)) { @@ -247,6 +248,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood altAllele = Allele.create(s, false); stop = loc.getStart(); } + else continue; // go on to next allele if consensus insertion has any non-standard base. } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 9470ce2f4..dc5dfc907 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -36,8 +36,10 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -121,8 +123,14 @@ public class ReadBackedPhasingWalker extends RodWalker samplesToPhase = null; + protected Set samplesToPhase = null; + + @Hidden + @Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false) + private boolean permitNoSampleOverlap = false; + + @Argument(fullName = "respectPhaseInInput", shortName = "respectPhaseInInput", doc = "Will only phase genotypes in cases where the resulting output will necessarily be consistent with any existing phase (for example, from trios)", required = false) + private boolean respectPhaseInInput = false; private GenomeLoc mostDownstreamLocusReached = null; @@ -205,8 +213,18 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); - writer.writeHeader(new VCFHeader(hInfo, samples)); + Set vcfSamples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); + writer.writeHeader(new VCFHeader(hInfo, vcfSamples)); + + Set readSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + readSamples.retainAll(vcfSamples); + if (readSamples.isEmpty()) { + String noPhaseString = "No common samples in VCF and BAM headers" + (samplesToPhase == null ? "" : " (limited to sampleToPhase parameters)") + ", so nothing could possibly be phased!"; + if (permitNoSampleOverlap) + logger.warn(noPhaseString); + else + throw new UserException(noPhaseString); + } } public boolean generateExtendedEvents() { @@ -472,6 +490,13 @@ public class ReadBackedPhasingWalker extends RodWalker readsAtHetSites = null; + private void clearFields() { + hetGenotypes = null; + prevHetAndInteriorIt = null; + phasingSiteIndex = -1; + readsAtHetSites = null; + } + public boolean hasPreviousHets() { return phasingSiteIndex > 0; } @@ -498,12 +523,20 @@ public class ReadBackedPhasingWalker extends RodWalker implements TreeReducible { + @Input(fullName="exception", shortName = "E", doc="Java class of exception to throw", required=true) + public String exceptionToThrow; + + // + // Template code to allow us to build the walker, doesn't actually do anything + // + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( exceptionToThrow.equals("UserException") ) { + throw new UserException("UserException"); + } else if ( exceptionToThrow.equals("NullPointerException") ) { + throw new NullPointerException(); + } else if ( exceptionToThrow.equals("ReviewedStingException") ) { + throw new ReviewedStingException("ReviewedStingException"); + } else { + throw new UserException.BadArgumentValue("exception", "exception isn't a recognized value " + exceptionToThrow); + } + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + public Integer treeReduce(final Integer lhs, final Integer rhs) { + return lhs + rhs; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java new file mode 100755 index 000000000..e9bfa3513 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/EmpiricalQual.java @@ -0,0 +1,55 @@ +package org.broadinstitute.sting.gatk.walkers.recalibration; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: Mar 22, 2012 + * + * Object that holds the empirical quality and estimated reported quality values for on-the-fly recalibration. This is a simplification of the RecalDatum object + */ + +public class EmpiricalQual { + + private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations + private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + + private EmpiricalQual() {} + + public EmpiricalQual(final double estimatedQReported, final double empiricalQuality) { + this.estimatedQReported = estimatedQReported; + this.empiricalQuality = empiricalQuality; + } + + public final double getEstimatedQReported() { + return estimatedQReported; + } + + public final double getEmpiricalQuality() { + return empiricalQuality; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index d18c7e10a..2b9f159ac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -93,6 +93,7 @@ import java.util.*; */ @Reference(window=@Window(start=-50, stop=50)) public class VariantEvalWalker extends RodWalker implements TreeReducible { + public static final String IS_SINGLETON_KEY = "ISSINGLETON"; @Output protected PrintStream out; @@ -213,6 +214,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // Public constants private static String ALL_SAMPLE_NAME = "all"; + // the number of processed bp for this walker + long nProcessedLoci = 0; + // Utility class private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); @@ -325,10 +329,10 @@ public class VariantEvalWalker extends RodWalker implements Tr */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( NewEvaluationContext nec : evaluationContexts.values() ) { - synchronized (nec) { - nec.update0(tracker, ref, context); - } + // we track the processed bp and expose this for modules instead of wasting CPU power on calculating + // the same thing over and over in evals that want the processed bp + synchronized (this) { + nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); } if (tracker != null) { @@ -454,7 +458,7 @@ public class VariantEvalWalker extends RodWalker implements Tr if ( lenientMatch == null ) lenientMatch = comp; break; case NO_MATCH: - ; + // do nothing } } @@ -494,7 +498,7 @@ public class VariantEvalWalker extends RodWalker implements Tr if (field.get(ve) instanceof TableType) { TableType t = (TableType) field.get(ve); - String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); + final String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); final DataPoint dataPointAnn = datamap.get(field); GATKReportTable table; @@ -509,17 +513,10 @@ public class VariantEvalWalker extends RodWalker implements Tr table.addColumn(vs.getName(), "unknown"); } - table.addColumn("row", "unknown"); - - for ( Object o : t.getColumnKeys() ) { - String c; - - if (o instanceof String) { - c = (String) o; - } else { - c = o.toString(); - } + table.addColumn(t.getRowName(), "unknown"); + for ( final Object o : t.getColumnKeys() ) { + final String c = o.toString(); table.addColumn(c, 0.0); } } else { @@ -527,7 +524,7 @@ public class VariantEvalWalker extends RodWalker implements Tr } for (int row = 0; row < t.getRowKeys().length; row++) { - String r = (String) t.getRowKeys()[row]; + final String r = t.getRowKeys()[row].toString(); for ( VariantStratifier vs : stratificationObjects ) { final String columnName = vs.getName(); @@ -535,17 +532,10 @@ public class VariantEvalWalker extends RodWalker implements Tr } for (int col = 0; col < t.getColumnKeys().length; col++) { - String c; - if (t.getColumnKeys()[col] instanceof String) { - c = (String) t.getColumnKeys()[col]; - } else { - c = t.getColumnKeys()[col].toString(); - } - - String newStateKey = stateKey.toString() + r; + final String c = t.getColumnKeys()[col].toString(); + final String newStateKey = stateKey.toString() + r; table.set(newStateKey, c, t.getCell(row, col)); - - table.set(newStateKey, "row", r); + table.set(newStateKey, t.getRowName(), r); } } } else { @@ -594,6 +584,10 @@ public class VariantEvalWalker extends RodWalker implements Tr public Set getJexlExpressions() { return jexlExpressions; } + public long getnProcessedLoci() { + return nProcessedLoci; + } + public Set getContigNames() { final TreeSet contigs = new TreeSet(); for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 9a97b005c..3a2635121 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -30,7 +30,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { @DataPoint(description = "Number of variants per base", format = "%.8f") public double variantRatePerBp = 0; - @DataPoint(description = "Number of snp loci", format = "%d") public long nSNPs = 0; @DataPoint(description = "Number of mnp loci", format = "%d") @@ -47,7 +46,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { @DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)", format = "%d") public long nMixed = 0; - @DataPoint(description = "Number of no calls loci", format = "%d") public long nNoCalls = 0; @DataPoint(description = "Number of het loci", format = "%d") @@ -72,8 +70,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval { public double indelRate = 0; @DataPoint(description = "indel rate per base pair", format = "%.2f") public double indelRatePerBp = 0; - @DataPoint(description = "deletion to insertion ratio", format = "%.2f") - public double deletionInsertionRatio = 0; + @DataPoint(description = "insertion to deletion ratio", format = "%.2f") + public double insertionDeletionRatio = 0; private double perLocusRate(long n) { return rate(n, nProcessedLoci); @@ -91,10 +89,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { return 1; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; @@ -113,12 +107,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval { case SNP: nVariantLoci++; nSNPs++; - if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; + if (variantWasSingleton(vc1)) nSingletons++; break; case MNP: nVariantLoci++; nMNPs++; - if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; + if (variantWasSingleton(vc1)) nSingletons++; break; case INDEL: nVariantLoci++; @@ -194,6 +188,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval { } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); variantRate = perLocusRate(nVariantLoci); variantRatePerBp = perLocusRInverseRate(nVariantLoci); heterozygosity = perLocusRate(nHets); @@ -201,6 +196,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { hetHomRatio = ratio(nHets, nHomVar); indelRate = perLocusRate(nDeletions + nInsertions + nComplex); indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex); - deletionInsertionRatio = ratio(nDeletions, nInsertions); + insertionDeletionRatio = ratio(nInsertions, nDeletions); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 4f5aeed61..75aacf5ba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -59,7 +59,7 @@ public class GenotypeConcordance extends VariantEvaluator { private boolean discordantInteresting = false; - static class FrequencyStats implements TableType { + static class FrequencyStats extends TableType { class Stats { public Stats(int found, int missed) { nFound = found; nMissed = missed; } public long nFound = 0; @@ -103,7 +103,7 @@ public class GenotypeConcordance extends VariantEvaluator { } } - static class QualityScoreHistograms implements TableType { + static class QualityScoreHistograms extends TableType { final static int NUM_BINS = 20; final HashMap truePositiveQualityScoreMap = new HashMap(); // A HashMap holds all the quality scores until we are able to bin them appropriately final HashMap falsePositiveQualityScoreMap = new HashMap(); @@ -362,7 +362,7 @@ public class GenotypeConcordance extends VariantEvaluator { /** * a table of sample names to genotype concordance figures */ -class SampleStats implements TableType { +class SampleStats extends TableType { private final int nGenotypeTypes; // sample to concordance stats object @@ -448,7 +448,7 @@ class SampleStats implements TableType { /** * a table of sample names to genotype concordance summary statistics */ -class SampleSummaryStats implements TableType { +class SampleSummaryStats extends TableType { protected final static String ALL_SAMPLES_KEY = "allSamples"; protected final static String[] COLUMN_KEYS = new String[]{ "percent_comp_ref_called_ref", diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java index f4369401b..41979798e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java @@ -60,6 +60,7 @@ public class GenotypePhasingEvaluator extends VariantEvaluator { double minPhaseQuality = 10.0; public void initialize(VariantEvalWalker walker) { + super.initialize(walker); this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality()); this.samplePrevGenotypes = new SamplePreviousGenotypes(); } @@ -294,14 +295,6 @@ class CompEvalGenotypes { public Genotype getEvalGenotype() { return evalGt; } - - public void setCompGenotype(Genotype compGt) { - this.compGt = compGt; - } - - public void setEvalGenotype(Genotype evalGt) { - this.evalGt = evalGt; - } } class SamplePreviousGenotypes { @@ -376,7 +369,7 @@ class PhaseStats { /** * a table of sample names to genotype phasing statistics */ -class SamplePhasingStatistics implements TableType { +class SamplePhasingStatistics extends TableType { private HashMap sampleStats = null; private double minPhaseQuality; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java deleted file mode 100755 index 6cf8b7c2c..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ /dev/null @@ -1,111 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -/** - * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl - * - * @Author chartl - * @Date May 26, 2010 - */ -@Analysis(name = "Indel length histograms", description = "Shows the distribution of insertion/deletion event lengths (negative for deletion, positive for insertion)") -public class IndelLengthHistogram extends VariantEvaluator { - private static final int SIZE_LIMIT = 100; - @DataPoint(description="Histogram of indel lengths") - IndelHistogram indelHistogram = new IndelHistogram(SIZE_LIMIT); - - /* - * Indel length histogram table object - */ - - static class IndelHistogram implements TableType { - private Integer[] colKeys; - private int limit; - private String[] rowKeys = {"EventLength"}; - private Integer[] indelHistogram; - - public IndelHistogram(int limit) { - colKeys = initColKeys(limit); - indelHistogram = initHistogram(limit); - this.limit = limit; - } - - public Object[] getColumnKeys() { - return colKeys; - } - - public Object[] getRowKeys() { - return rowKeys; - } - - public Object getCell(int row, int col) { - return indelHistogram[col]; - } - - private Integer[] initColKeys(int size) { - Integer[] cK = new Integer[size*2+1]; - int index = 0; - for ( int i = -size; i <= size; i ++ ) { - cK[index] = i; - index++; - } - - return cK; - } - - private Integer[] initHistogram(int size) { - Integer[] hist = new Integer[size*2+1]; - for ( int i = 0; i < 2*size+1; i ++ ) { - hist[i] = 0; - } - - return hist; - } - - public String getName() { return "indelHistTable"; } - - public void update(int eLength) { - indelHistogram[len2index(eLength)]++; - } - - private int len2index(int len) { - if ( len > limit || len < -limit ) { - throw new ReviewedStingException("Indel length exceeds limit of "+limit+" please increase indel limit size"); - } - return len + limit; - } - } - - public boolean enabled() { return true; } - - public String getName() { return "IndelLengthHistogram"; } - - public int getComparisonOrder() { return 1; } // need only the evals - - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - if ( vc1.isIndel() && vc1.isPolymorphicInSamples() ) { - - if ( ! vc1.isBiallelic() ) { - //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); - return vc1.toString(); // biallelic sites are output - } - - // only count simple insertions/deletions, not complex indels - if ( vc1.isSimpleInsertion() ) { - indelHistogram.update(vc1.getAlternateAllele(0).length()); - } else if ( vc1.isSimpleDeletion() ) { - indelHistogram.update(-vc1.getReference().length()); - } - } - - return null; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java deleted file mode 100755 index 87b453ae3..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ /dev/null @@ -1,295 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.IndelUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.ArrayList; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -@Analysis(name = "IndelStatistics", description = "Shows various indel metrics and statistics") -public class IndelStatistics extends VariantEvaluator { - @DataPoint(description = "Indel Statistics") - IndelStats indelStats = null; - - // @DataPoint(description = "Indel Classification") - IndelClasses indelClasses = null; - - int numSamples = 0; - - public void initialize(VariantEvalWalker walker) { - numSamples = walker.getNumSamples(); - } - - private static final int INDEL_SIZE_LIMIT = 100; - private static final int IND_HET = 0; - private static final int IND_INS = 1; - private static final int IND_DEL = 2; - private static final int IND_COMPLEX = 3; - private static final int IND_HET_INS = 4; - private static final int IND_HOM_INS = 5; - private static final int IND_HET_DEL = 6; - private static final int IND_HOM_DEL = 7; - private static final int IND_HOM_REF = 8; - private static final int IND_MIXED = 9; - private static final int IND_LONG = 10; - private static final int IND_AT_EXP = 11; - private static final int IND_CG_EXP = 12; - private static final int IND_FRAMESHIFT = 13; - private static final int NUM_SCALAR_COLUMNS = 14; - - static int len2Index(int ind) { - return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS; - } - - static int index2len(int ind) { - return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS; - } - - static class IndelStats implements TableType { - protected final static String[] COLUMN_KEYS; - - static { - COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1]; - COLUMN_KEYS[0] = "heterozygosity"; - COLUMN_KEYS[1] = "insertions"; - COLUMN_KEYS[2] = "deletions"; - COLUMN_KEYS[3] = "complex"; - COLUMN_KEYS[4] = "het_insertions"; - COLUMN_KEYS[5] = "homozygous_insertions"; - COLUMN_KEYS[6] = "het_deletions"; - COLUMN_KEYS[7] = "homozygous_deletions"; - COLUMN_KEYS[8] = "homozygous_reference_sites"; - COLUMN_KEYS[9] = "complex_events"; - COLUMN_KEYS[10] = "long_indels"; - COLUMN_KEYS[11] = "AT_expansions"; - COLUMN_KEYS[12] = "CG_expansions"; - COLUMN_KEYS[13] = "frameshift_indels"; - - for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++) - COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k)); - } - - // map of sample to statistics - protected final int[] indelSummary; - - public IndelStats(final VariantContext vc) { - indelSummary = new int[COLUMN_KEYS.length]; - } - - /** - * - * @return one row per sample - */ - public Object[] getRowKeys() { - return new String[]{"all"}; - } - public Object getCell(int x, int y) { - return String.format("%d",indelSummary[y]); - } - - /** - * get the column keys - * @return a list of objects, in this case strings, that are the column names - */ - public Object[] getColumnKeys() { - return COLUMN_KEYS; - } - - public String getName() { - return "IndelStats"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public String toString() { - return getName(); - } - - /* - * increment the specified value - */ - public void incrValue(VariantContext vc, ReferenceContext ref) { - int eventLength = 0; - boolean isInsertion = false, isDeletion = false; - - if ( vc.isSimpleInsertion() ) { - eventLength = vc.getAlternateAllele(0).length(); - indelSummary[IND_INS]++; - isInsertion = true; - } else if ( vc.isSimpleDeletion() ) { - indelSummary[IND_DEL]++; - eventLength = -vc.getReference().length(); - isDeletion = true; - } - else if (vc.isComplexIndel()) { - indelSummary[IND_COMPLEX]++; - } - else if (vc.isMixed()) - indelSummary[IND_MIXED]++; - - if (IndelUtils.isATExpansion(vc,ref)) - indelSummary[IND_AT_EXP]++; - if (IndelUtils.isCGExpansion(vc,ref)) - indelSummary[IND_CG_EXP]++; - - // make sure event doesn't overstep array boundaries - if (vc.isSimpleDeletion() || vc.isSimpleInsertion()) { - if (Math.abs(eventLength) < INDEL_SIZE_LIMIT) { - indelSummary[len2Index(eventLength)]++; - if (eventLength % 3 != 0) - indelSummary[IND_FRAMESHIFT]++; - } - else - indelSummary[IND_LONG]++; - } - - } - } - - static class IndelClasses implements TableType { - protected final static String[] columnNames = IndelUtils.getIndelClassificationNames(); - - - // map of sample to statistics - protected final int[] indelClassSummary; - - public IndelClasses(final VariantContext vc) { - indelClassSummary = new int[columnNames.length]; - } - - /** - * - * @return one row per sample - */ - public Object[] getRowKeys() { - return new String[]{"all"}; - } - public Object getCell(int x, int y) { - return String.format("%d",indelClassSummary[y]); - } - - /** - * get the column keys - * @return a list of objects, in this case strings, that are the column names - */ - public Object[] getColumnKeys() { - return columnNames; - } - - public String getName() { - return "IndelClasses"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public String toString() { - return getName(); - } - - private void incrementSampleStat(VariantContext vc, int index) { - indelClassSummary[index]++; - } - /* - * increment the specified value - */ - public void incrValue(VariantContext vc, ReferenceContext ref) { - - - ArrayList indices = IndelUtils.findEventClassificationIndex(vc,ref); - //System.out.format("pos:%d \nREF: %s, ALT: %s\n",vc.getStart(), vc.getReference().getDisplayString(), - // vc.getAlternateAllele(0).getDisplayString()); - - byte[] refBases = ref.getBases(); - //System.out.format("ref bef:%s\n",new String(Arrays.copyOfRange(refBases,0,refBases.length/2+1) )); - //System.out.format("ref aft:%s\n",new String(Arrays.copyOfRange(refBases,refBases.length/2+1,refBases.length) )); - for (int index: indices) { - incrementSampleStat(vc, index); - // System.out.println(IndelUtils.getIndelClassificationName(index)); - } - } - - } - - //public IndelStatistics(VariantEvalWalker parent) { - //super(parent); - // don't do anything - //} - - public String getName() { - return "IndelStatistics"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public boolean enabled() { - return true; - } - - public String toString() { - return getName(); - } - - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - if (eval != null && eval.isPolymorphicInSamples()) { - if ( indelStats == null ) { - indelStats = new IndelStats(eval); - } - if ( indelClasses == null ) { - indelClasses = new IndelClasses(eval); - } - - if ( eval.isIndel() || eval.isMixed() ) { - if (indelStats != null ) - indelStats.incrValue(eval, ref); - - if (indelClasses != null) - indelClasses.incrValue(eval, ref); - } - } - - return null; // This module doesn't capture any interesting sites, so return null - } - - public void finalizeEvaluation() { - int k=0; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java new file mode 100644 index 000000000..51cf2bb6a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.IndelHistogram; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +@Analysis(description = "Evaluation summary for indels") +public class IndelSummary extends VariantEvaluator implements StandardEval { + final protected static Logger logger = Logger.getLogger(IndelSummary.class); + + @DataPoint(description = "Number of SNPs", format = "%d") + public int n_SNPs = 0; + + @DataPoint(description = "Number of singleton SNPs", format = "%d") + public int n_singleton_SNPs = 0; + + @DataPoint(description = "Number of Indels", format = "%d") + public int n_indels = 0; + + // Number of Indels Sites (counts one for any number of alleles at site) + public int nIndelSites = 0; + + @DataPoint(description = "Number of singleton Indels", format = "%d") + public int n_singleton_indels = 0; + + // counts 1 for each site where the number of alleles > 2 + public int nMultiIndelSites = 0; + + @DataPoint(description = "Percent of indel sites that are multi-allelic") + public String percent_of_sites_with_more_than_2_alleles; + + @DataPoint(description = "SNP to indel ratio") + public String SNP_to_indel_ratio; + + @DataPoint(description = "Singleton SNP to indel ratio") + public String SNP_to_indel_ratio_for_singletons; + + @DataPoint(description = "Indel novelty rate") + public String indel_novelty_rate; + + @DataPoint(description = "1 to 2 bp indel ratio") + public String ratio_of_1_to_2_bp_indels; + + @DataPoint(description = "1 to 3 bp indel ratio") + public String ratio_of_1_to_3_bp_indels; + + @DataPoint(description = "2 to 3 bp indel ratio") + public String ratio_of_2_to_3_bp_indels; + + @DataPoint(description = "1 and 2 to 3 bp indel ratio") + public String ratio_of_1_and_2_to_3_bp_indels; + + @DataPoint(description = "Frameshift percent") + public String frameshift_rate_for_coding_indels; + + // + // insertions to deletions + // + @DataPoint(description = "Insertion to deletion ratio") + public String insertion_to_deletion_ratio; + + @DataPoint(description = "Insertion to deletion ratio for 1 bp events") + public String insertion_to_deletion_ratio_for_1bp_indels; + + // + // Frameshifts + // + @DataPoint(description = "Number of indels in protein-coding regions labeled as frameshift") + public int n_coding_indels_frameshifting = 0; + + @DataPoint(description = "Number of indels in protein-coding regions not labeled as frameshift") + public int n_coding_indels_in_frame = 0; + + // + // Het : hom ratios + // + @DataPoint(description = "Het to hom ratio for SNPs") + public String SNP_het_to_hom_ratio; + + @DataPoint(description = "Het to hom ratio for indels") + public String indel_het_to_hom_ratio; + + int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0; + + int nKnownIndels = 0, nInsertions = 0; + int n1bpInsertions = 0, n1bpDeletions = 0; + int[] countByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used + + public final static int MAX_SIZE_FOR_HISTOGRAM = 10; + @DataPoint(description = "Histogram of indel lengths") + IndelHistogram lengthHistogram = new IndelHistogram(MAX_SIZE_FOR_HISTOGRAM, true); + + @DataPoint(description = "Number of large (>10 bp) deletions") + public int n_large_deletions = 0; + + @DataPoint(description = "Number of large (>10 bp) insertions") + public int n_large_insertions = 0; + + @DataPoint(description = "Ratio of large (>10 bp) insertions to deletions") + public String insertion_to_deletion_ratio_for_large_indels; + + @Override public boolean enabled() { return true; } + @Override public int getComparisonOrder() { return 2; } + + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( eval == null || eval.isMonomorphicInSamples() ) + return null; + + // update counts + switch ( eval.getType() ) { + case SNP: + n_SNPs += eval.getNAlleles() - 1; // -1 for ref + if ( variantWasSingleton(eval) ) n_singleton_SNPs++; + + // collect information about het / hom ratio + for ( final Genotype g : eval.getGenotypes() ) { + if ( g.isHet() ) nSNPHets++; + if ( g.isHomVar() ) nSNPHoms++; + } + break; + case INDEL: + if ( eval.isComplexIndel() ) break; // don't count complex substitutions + + nIndelSites++; + if ( ! eval.isBiallelic() ) nMultiIndelSites++; + if ( variantWasSingleton(eval) ) n_singleton_indels++; + + // collect information about het / hom ratio + for ( final Genotype g : eval.getGenotypes() ) { + if ( g.isHet() ) nIndelHets++; + if ( g.isHomVar() ) nIndelHoms++; + } + + for ( Allele alt : eval.getAlternateAlleles() ) { + n_indels++; // +1 for each alt allele + + if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific? + + // ins : del ratios + final int alleleSize = alt.length() - eval.getReference().length(); + if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference()); + if ( alleleSize > 0 ) nInsertions++; + if ( alleleSize == 1 ) n1bpInsertions++; + if ( alleleSize == -1 ) n1bpDeletions++; + + // update the length histogram + lengthHistogram.update(eval.getReference(), alt); + + // requires snpEFF annotations + if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) { + final String effect = eval.getAttributeAsString("SNPEFF_EFFECT", "missing"); + if ( effect.equals("missing") ) + throw new ReviewedStingException("Saw SNPEFF_GENE_BIOTYPE but unexpected no SNPEFF_EFFECT at " + eval); + if ( effect.equals("FRAME_SHIFT") ) + n_coding_indels_frameshifting++; + else if ( effect.startsWith("CODON") ) + n_coding_indels_in_frame++; + else + ; // lots of protein coding effects that shouldn't be counted, such as INTRON + } + + // update the baby histogram + final int absSize = Math.abs(alleleSize); + if ( absSize < countByLength.length ) countByLength[absSize]++; + } + + break; + default: + throw new UserException.BadInput("Unexpected variant context type: " + eval); + } + + return null; // we don't capture any interesting sites + } + + public void finalizeEvaluation() { + percent_of_sites_with_more_than_2_alleles = formattedRatio(nMultiIndelSites, nIndelSites); + SNP_to_indel_ratio = formattedRatio(n_SNPs, n_indels); + SNP_to_indel_ratio_for_singletons = formattedRatio(n_singleton_SNPs, n_singleton_indels); + indel_novelty_rate = formattedNoveltyRate(nKnownIndels, n_indels); + ratio_of_1_to_2_bp_indels = formattedRatio(countByLength[1], countByLength[2]); + ratio_of_1_to_3_bp_indels = formattedRatio(countByLength[1], countByLength[3]); + ratio_of_2_to_3_bp_indels = formattedRatio(countByLength[2], countByLength[3]); + ratio_of_1_and_2_to_3_bp_indels = formattedRatio(countByLength[1] + countByLength[2], countByLength[3]); + frameshift_rate_for_coding_indels = formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting); + + SNP_het_to_hom_ratio = formattedRatio(nSNPHets, nSNPHoms); + indel_het_to_hom_ratio = formattedRatio(nIndelHets, nIndelHoms); + + n_large_deletions = lengthHistogram.getnTooBigDeletions(); + n_large_insertions = lengthHistogram.getnTooBigInsertions(); + + insertion_to_deletion_ratio = formattedRatio(nInsertions, n_indels - nInsertions); + insertion_to_deletion_ratio_for_1bp_indels = formattedRatio(n1bpInsertions, n1bpDeletions); + insertion_to_deletion_ratio_for_large_indels = formattedRatio(n_large_insertions, n_large_deletions); + + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 7f3bf6290..db2bf61c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.PrintStream; import java.util.Map; import java.util.Set; @@ -60,17 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description = "Number of mendelian violations found", format = "%d") long nViolations; - - /*@DataPoint(description = "number of child hom ref calls where the parent was hom variant", format = "%d") - long KidHomRef_ParentHomVar; - @DataPoint(description = "number of child het calls where the parent was hom ref", format = "%d") - long KidHet_ParentsHomRef; - @DataPoint(description = "number of child het calls where the parent was hom variant", format = "%d") - long KidHet_ParentsHomVar; - @DataPoint(description = "number of child hom variant calls where the parent was hom ref", format = "%d") - long KidHomVar_ParentHomRef; - */ - @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d") long mvRefRef_Var; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d") @@ -88,12 +76,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d") long mvVarVar_Het; - - /*@DataPoint(description ="Number of inherited var alleles from het parents", format = "%d") - long nInheritedVar; - @DataPoint(description ="Number of inherited ref alleles from het parents", format = "%d") - long nInheritedRef;*/ - @DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d") long HomRefHomRef_HomRef; @DataPoint(description="Number of Het/Het/Het trios", format = "%d") @@ -120,18 +102,15 @@ public class MendelianViolationEvaluator extends VariantEvaluator { long HomVarHet_inheritedVar; MendelianViolation mv; - PrintStream mvFile; Map> families; public void initialize(VariantEvalWalker walker) { - //Changed by Laurent Francioli - 2011-06-07 - //mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); + super.initialize(walker); mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false); families = walker.getSampleDB().getFamilies(); } public boolean enabled() { - //return getVEWalker().FAMILY_STRUCTURE != null; return true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java deleted file mode 100644 index 7ed179c32..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; - -import java.util.*; - -@Analysis(description = "Evaluation summary for multi-allelic variants") -public class MultiallelicAFs extends VariantEvaluator { - final protected static Logger logger = Logger.getLogger(MultiallelicAFs.class); - - public enum Type { - SNP, INDEL - } - - @DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele") - AFHistogram AFhistogramMaxSnp = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for less common SNP alternate alleles") - AFHistogram AFhistogramMinSnp = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for most common Indel alternate allele") - AFHistogram AFhistogramMaxIndel = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for less common Indel alternate alleles") - AFHistogram AFhistogramMinIndel = new AFHistogram(); - - /* - * AF histogram table object - */ - static class AFHistogram implements TableType { - private Object[] rowKeys, colKeys = {"count"}; - private int[] AFhistogram; - - private static final double AFincrement = 0.01; - private static final int numBins = (int)(1.00 / AFincrement); - - public AFHistogram() { - rowKeys = initRowKeys(); - AFhistogram = new int[rowKeys.length]; - } - - public Object[] getColumnKeys() { - return colKeys; - } - - public Object[] getRowKeys() { - return rowKeys; - } - - public Object getCell(int row, int col) { - return AFhistogram[row]; - } - - private static Object[] initRowKeys() { - ArrayList keyList = new ArrayList(numBins + 1); - for ( double a = 0.00; a <= 1.01; a += AFincrement ) { - keyList.add(String.format("%.2f", a)); - } - return keyList.toArray(); - } - - public String getName() { return "AFHistTable"; } - - public void update(final double AF) { - final int bin = (int)(numBins * MathUtils.round(AF, 2)); - AFhistogram[bin]++; - } - } - - public void initialize(VariantEvalWalker walker) {} - - @Override public boolean enabled() { return true; } - - public int getComparisonOrder() { - return 2; - } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {} - - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( eval == null || eval.isMonomorphicInSamples() ) - return null; - - if ( !eval.isBiallelic() ) - return null; - - // update counts - switch ( eval.getType() ) { - case SNP: - updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp); - break; - case INDEL: - updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel); - break; - default: - throw new UserException.BadInput("Unexpected variant context type: " + eval); - } - - return null; // we don't capture any interesting sites - } - - private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) { - - final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null); - if ( obj == null || !(obj instanceof List) ) - return; - - List list = (List)obj; - ArrayList AFs = new ArrayList(list.size()); - for ( String str : list ) { - AFs.add(Double.valueOf(str)); - } - - Collections.sort(AFs); - max.update(AFs.get(AFs.size()-1)); - for ( int i = 0; i < AFs.size() - 1; i++ ) - min.update(AFs.get(i)); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 5cea0322f..90c2def0b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -28,11 +28,12 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Evaluation summary for multi-allelic variants") public class MultiallelicSummary extends VariantEvaluator implements StandardEval { @@ -87,17 +88,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva public String indelNoveltyRate = "NA"; - public void initialize(VariantEvalWalker walker) {} - @Override public boolean enabled() { return true; } - - public int getComparisonOrder() { - return 2; - } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } + @Override public int getComparisonOrder() { return 2; } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( eval == null || eval.isMonomorphicInSamples() ) @@ -156,13 +148,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva // TODO -- implement me } - private final String noveltyRate(final int all, final int known) { - final int novel = all - known; - final double rate = (novel / (1.0 * all)); - return all == 0 ? "NA" : String.format("%.2f", rate); - } - public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci; @@ -170,7 +157,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva TiTvRatio = (double)nTi / (double)nTv; - SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete); - indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete); + SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs); + indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java deleted file mode 100755 index ab1f410f9..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PhaseStats.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -/** - * Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings - * | File Templates. - */ -class NewPhaseStats { - public int neitherPhased; - public int onlyCompPhased; - public int onlyEvalPhased; - public int phasesAgree; - public int phasesDisagree; - - public NewPhaseStats() { - this.neitherPhased = 0; - this.onlyCompPhased = 0; - this.onlyEvalPhased = 0; - this.phasesAgree = 0; - this.phasesDisagree = 0; - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree); - return sb.toString(); - } - - public static String[] getFieldNamesArray() { - return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"}; - } - - public Object getField(int index) { - switch (index) { - case (0): - return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree); - case (1): - return neitherPhased; - case (2): - return onlyCompPhased; - case (3): - return onlyEvalPhased; - case (4): - return (phasesAgree + phasesDisagree); - case (5): - return phasesAgree; - case (6): - return phasesDisagree; - case (7): - return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree)); - default: - return -1; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java deleted file mode 100755 index 751f61a97..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SamplePreviousGenotypes.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.variantcontext.Genotype; - -import java.util.HashMap; - -/** - * Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings - * | File Templates. - */ -class NewSamplePreviousGenotypes { - private HashMap sampleGenotypes = null; - - public NewSamplePreviousGenotypes() { - this.sampleGenotypes = new HashMap(); - } - - public CompEvalGenotypes get(String sample) { - return sampleGenotypes.get(sample); - } - - public void put(String sample, CompEvalGenotypes compEvalGts) { - sampleGenotypes.put(sample, compEvalGts); - } - - public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) { - sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt)); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 83a1c2f3b..d5cf685de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -4,14 +4,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationContext; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Collection; - public abstract class VariantEvaluator { - public void initialize(VariantEvalWalker walker) {} + private VariantEvalWalker walker; + + public void initialize(VariantEvalWalker walker) { + this.walker = walker; + } + + public VariantEvalWalker getWalker() { + return walker; + } public abstract boolean enabled(); @@ -19,9 +23,8 @@ public abstract class VariantEvaluator { public abstract int getComparisonOrder(); // called at all sites, regardless of eval context itself; useful for counting processed bases - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - } + // No longer available. The processed bp is kept in VEW itself for performance reasons + // public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return null; @@ -45,8 +48,46 @@ public abstract class VariantEvaluator { return ((double)num) / (Math.max(denom, 1)); } - public boolean stateIsApplicable(StateKey stateKey) { - return true; + /** + * Returns true if the variant in vc was a singleton in the original input evaluation + * set, regardless of variant context subsetting that has occurred. + * @param eval the VariantContext being assessed for this previous status as a singleton + * @return true if eval was originally a singleton site + */ + protected static boolean variantWasSingleton(final VariantContext eval) { + return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); } + /** + * Convenience function that formats the novelty rate as a %.2f string + * + * @param known number of variants from all that are known + * @param all number of all variants + * @return a String novelty rate, or NA if all == 0 + */ + protected static String formattedNoveltyRate(final int known, final int all) { + return formattedPercent(all - known, all); + } + + /** + * Convenience function that formats the novelty rate as a %.2f string + * + * @param x number of objects part of total that meet some criteria + * @param total count of all objects, including x + * @return a String percent rate, or NA if total == 0 + */ + protected static String formattedPercent(final int x, final int total) { + return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total)); + } + + /** + * Convenience function that formats a ratio as a %.2f string + * + * @param num number of observations in the numerator + * @param denom number of observations in the denumerator + * @return a String formatted ratio, or NA if all == 0 + */ + protected static String formattedRatio(final int num, final int denom) { + return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index ce9e45c9b..8417faf5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -54,7 +54,7 @@ public class VariantQualityScore extends VariantEvaluator { @DataPoint(description = "average variant quality for each allele count") AlleleCountStats alleleCountStats = null; - static class TiTvStats implements TableType { + static class TiTvStats extends TableType { final static int NUM_BINS = 20; final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately final long transitionByQuality[] = new long[NUM_BINS]; @@ -73,10 +73,6 @@ public class VariantQualityScore extends VariantEvaluator { return columnKeys; } - public String getName() { - return "TiTvStats"; - } - public String getCell(int x, int y) { return String.valueOf(titvByQuality[y]); } @@ -143,7 +139,7 @@ public class VariantQualityScore extends VariantEvaluator { } } - class AlleleCountStats implements TableType { + class AlleleCountStats extends TableType { final HashMap> qualityListMap = new HashMap>(); final HashMap qualityMap = new HashMap(); @@ -163,10 +159,6 @@ public class VariantQualityScore extends VariantEvaluator { return new String[]{"alleleCount","avgQual"}; } - public String getName() { - return "AlleleCountStats"; - } - public String getCell(int x, int y) { int iii = 0; for( final Integer key : qualityListMap.keySet() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index aa3eff756..64161ac34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -49,7 +49,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { /** Indels with size greater than this value are tallied in the CNV column */ private final static int MAX_INDEL_LENGTH = 50; private final static double MIN_CNV_OVERLAP = 0.5; - private VariantEvalWalker walker; public enum Type { SNP, INDEL, CNV @@ -152,7 +151,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { public void initialize(VariantEvalWalker walker) { - this.walker = walker; + super.initialize(walker); nSamples = walker.getSampleNamesForEvaluation().size(); countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation()); @@ -176,11 +175,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return 2; // we only need to see each eval track } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } - - private final Type getType(VariantContext vc) { + private Type getType(VariantContext vc) { switch (vc.getType()) { case SNP: return Type.SNP; @@ -196,9 +191,9 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { } } - private final boolean overlapsKnownCNV(VariantContext cnv) { + private boolean overlapsKnownCNV(VariantContext cnv) { if ( knownCNVs != null ) { - final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true); + final GenomeLoc loc = getWalker().getGenomeLocParser().createGenomeLoc(cnv, true); IntervalTree intervalTree = knownCNVs.get(loc.getContig()); final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); @@ -252,15 +247,14 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return null; // we don't capture any interesting sites } - private final String noveltyRate(Type type) { + private String noveltyRate(Type type) { final int all = allVariantCounts.all(type); final int known = knownVariantCounts.all(type); - final int novel = all - known; - final double rate = (novel / (1.0 * all)); - return all == 0 ? "NA" : String.format("%.2f", rate); + return formattedNoveltyRate(known, all); } public void finalizeEvaluation() { + nProcessedLoci = getWalker().getnProcessedLoci(); nSNPs = allVariantCounts.all(Type.SNP); nIndels = allVariantCounts.all(Type.INDEL); nSVs = allVariantCounts.all(Type.CNV); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java new file mode 100644 index 000000000..a6c86d3da --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.util; + +import org.broadinstitute.sting.utils.variantcontext.Allele; + +import java.util.*; + +/** + * Simple utility for histogramming indel lengths + * + * Based on code from chartl + * + * @author Mark DePristo + * @since 3/21/12 + */ +public class IndelHistogram extends TableType { + private final boolean asFrequencies; + int nIndels = 0, nTooBigDeletions = 0, nTooBigInsertions = 0; + private final Integer[] rowKeys; + + private Map frequencies = null; + private final Map counts = new HashMap(); + + public IndelHistogram(int maxSize, boolean asFrequencies) { + this.asFrequencies = asFrequencies; + initializeCounts(maxSize); + this.rowKeys = new ArrayList(counts.keySet()).toArray(new Integer[maxSize]); + } + + private void initializeCounts(int size) { + for ( int i = -size; i <= size; i++ ) { + if ( i != 0 ) counts.put(i, 0); + } + } + + @Override + public String getRowName() { + return "Length"; + } + + @Override + public Object[] getColumnKeys() { + return new String[]{"Count"}; + } + + @Override + public Object[] getRowKeys() { + return rowKeys; + } + + @Override + public Object getCell(int row, int col) { + final int key = (Integer)getRowKeys()[row]; + if ( asFrequencies ) { + if ( frequencies == null ) { + frequencies = new HashMap(); + for ( final int len : counts.keySet() ) { + final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels); + frequencies.put(len, value); + } + } + return frequencies.get(key); + } + return counts.get(key); + } + + public int getnTooBigDeletions() { + return nTooBigDeletions; + } + + public int getnTooBigInsertions() { + return nTooBigInsertions; + } + + public void update(final Allele ref, final Allele alt) { + final int alleleSize = alt.length() - ref.length(); + update(alleleSize); + } + + public void update(int len) { + if ( counts.containsKey(len) ) { + nIndels++; + counts.put(len, counts.get(len) + 1); + } else if ( len < 0 ) { + nTooBigDeletions++; + } else { + nTooBigInsertions++; + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java index c34e44516..09f2c0168 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java @@ -10,34 +10,20 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.HashMap; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; public class NewEvaluationContext extends HashMap { - public TreeMap evaluationInstances; - - public String toString() { - String value = ""; - - for ( VariantStratifier key : this.keySet() ) { - value += "\t" + key.getName() + ":" + this.get(key) + "\n"; - } - - return value; - } + private Map evaluationInstances; public void addEvaluationClassList(VariantEvalWalker walker, StateKey stateKey, Set> evaluationClasses) { - evaluationInstances = new TreeMap(); + evaluationInstances = new LinkedHashMap(evaluationClasses.size()); - for ( Class c : evaluationClasses ) { + for ( final Class c : evaluationClasses ) { try { - VariantEvaluator eval = c.newInstance(); + final VariantEvaluator eval = c.newInstance(); eval.initialize(walker); - if (eval.stateIsApplicable(stateKey)) { - evaluationInstances.put(c.getSimpleName(), eval); - } + evaluationInstances.put(c.getSimpleName(), eval); } catch (InstantiationException e) { throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'"); } catch (IllegalAccessException e) { @@ -47,13 +33,11 @@ public class NewEvaluationContext extends HashMap { } public TreeMap getEvaluationClassList() { - return evaluationInstances; + return new TreeMap(evaluationInstances); } public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - + for ( final VariantEvaluator evaluation : evaluationInstances.values() ) { // the other updateN methods don't see a null context if ( tracker == null ) continue; @@ -77,10 +61,4 @@ public class NewEvaluationContext extends HashMap { } } } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - evaluation.update0(tracker, ref, context); - } - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java index 7ffc3e2c8..6ab7d1af3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java @@ -9,9 +9,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; * * an interface for turning arbritary objects into tables */ -public interface TableType { - public Object[] getRowKeys(); - public Object[] getColumnKeys(); - public Object getCell(int x, int y); - public String getName(); +public abstract class TableType { + public abstract Object[] getRowKeys(); + public abstract Object[] getColumnKeys(); + public abstract Object getCell(int x, int y); + public String getName() { return getClass().getSimpleName(); } + public String getRowName() { return "row"; } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 44af9f574..91c7140e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -191,7 +191,7 @@ public class VariantEvalUtils { * @return a map of all the evaluation contexts */ public HashMap initializeEvaluationContexts(Set stratificationObjects, Set> evaluationObjects, Stack stratStack, NewEvaluationContext ec) { - HashMap ecs = new HashMap(); + HashMap ecs = new LinkedHashMap(); if (stratStack == null) { stratStack = new Stack(); @@ -310,7 +310,7 @@ public class VariantEvalUtils { final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount(); if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) { - builder.attribute("ISSINGLETON", true); + builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true); } VariantContextUtils.calculateChromosomeCounts(builder, true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index fac5b22aa..6b36f4e1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -184,6 +184,8 @@ public class ApplyRecalibration extends RodWalker { } } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(RECAL_FILE, e); + } catch ( Exception e ) { + throw new UserException.MalformedFile(RECAL_FILE, "Could not parse LOD and annotation information in input recal file. File is somehow malformed."); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 684b9102a..3066b0bc6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -243,6 +243,19 @@ public class CombineVariants extends RodWalker { if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + + // TODO -- clean this up in a refactoring + // merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type) + if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) { + final List refs = VCsByType.remove(VariantContext.Type.NO_VARIATION); + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) { + VCsByType.get(type).addAll(refs); + break; + } + } + } + // iterate over the types so that it's deterministic for (VariantContext.Type type : VariantContext.Type.values()) { if (VCsByType.containsKey(type)) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 36674adee..cf44e7c36 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -25,248 +25,267 @@ package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual; import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.regex.Pattern; +import java.util.*; /** * Utility methods to facilitate on-the-fly base quality score recalibration. * - * User: rpoplin + * User: carneiro and rpoplin * Date: 2/4/12 */ public class BaseRecalibration { + private List qualQuantizationMap; // histogram containing the map for qual quantization (calculated after recalibration is done) + private LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager - private ArrayList> collapsedHashes = new ArrayList> (); // All the collapsed data tables + private ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation - private final ArrayList requestedCovariates = new ArrayList(); // List of all covariates to be used in this calculation - private final ArrayList requiredCovariates = new ArrayList(); // List of required covariates to be used in this calculation - private final ArrayList optionalCovariates = new ArrayList(); // List of optional covariates to be used in this calculation - - public static final Pattern REQUIRED_COVARIATE_PATTERN = Pattern.compile("^# Required Covariates.*"); - public static final Pattern OPTIONAL_COVARIATE_PATTERN = Pattern.compile("^# Optional Covariates.*"); - public static final String EOF_MARKER = "EOF"; + private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code"; + private static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here"; - private static final byte SMOOTHING_CONSTANT = 1; - - ArrayList keyManagers = new ArrayList(); + /** + * Should ALWAYS use the constructor with the GATK Report file + */ + private BaseRecalibration() {} + /** + * Constructor using a GATK Report file + * + * @param RECAL_FILE a GATK Report file containing the recalibration information + */ public BaseRecalibration(final File RECAL_FILE) { - // Get a list of all available covariates - final List> classes = new PluginManager(Covariate.class).getPlugins(); - RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // todo -- initialize with the parameters from the csv file! + GATKReport report = new GATKReport(RECAL_FILE); - int lineNumber = 0; + GATKReportTable argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); + RecalibrationArgumentCollection RAC = initializeArgumentCollectionTable(argumentTable); - boolean foundRequiredCovariates = false; - boolean foundOptionalCovariates = false; - boolean initializedKeyManagers = false; + GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); + qualQuantizationMap = initializeQuantizationTable(quantizedTable); - // Read in the data from the csv file and populate the data map and covariates list - boolean sawEOF = false; - try { - for (String line : new XReadLines(RECAL_FILE)) { - lineNumber++; + Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + ArrayList requiredCovariates = covariates.getFirst(); + ArrayList optionalCovariates = covariates.getSecond(); + requestedCovariates.addAll(requiredCovariates); // add all required covariates to the list of requested covariates + requestedCovariates.addAll(optionalCovariates); // add all optional covariates to the list of requested covariates - sawEOF = EOF_MARKER.equals(line); - if (sawEOF) - break; - - boolean requiredCovariatesLine = REQUIRED_COVARIATE_PATTERN.matcher(line).matches(); - boolean optionalCovariatesLine = OPTIONAL_COVARIATE_PATTERN.matcher(line).matches(); - - if (requiredCovariatesLine && foundRequiredCovariates) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate required covariates line"); - - if (optionalCovariatesLine && foundOptionalCovariates) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate optional covariates line"); - - if (optionalCovariatesLine && !foundRequiredCovariates) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Optional covariates reported before Required covariates"); - - if (requiredCovariatesLine || optionalCovariatesLine) { - String [] covariateNames = line.split(": ")[1].split(","); // take the second half of the string (past the ":") and split it by "," to get the list of required covariates - - List covariateList = requiredCovariatesLine ? requiredCovariates : optionalCovariates; // set the appropriate covariate list to update - - for (String covariateName : covariateNames) { - boolean foundClass = false; - for (Class covClass : classes) { - if ((covariateName + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) { - foundClass = true; - try { - Covariate covariate = (Covariate) covClass.newInstance(); - covariate.initialize(RAC); - requestedCovariates.add(covariate); - covariateList.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - if (!foundClass) - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (covariateName + "Covariate") + ") isn't a valid covariate option."); - } - foundRequiredCovariates = foundRequiredCovariates || requiredCovariatesLine; - foundOptionalCovariates = foundOptionalCovariates || optionalCovariatesLine; - } - - else if (!line.startsWith("#")) { // if this is not a comment line that we don't care about, it is DATA! - if (!foundRequiredCovariates || !foundOptionalCovariates) // At this point all the covariates should have been found and initialized - throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE); - - if (!initializedKeyManagers) { - ArrayList emptyList = new ArrayList(0); - ArrayList requiredCovariatesUpToThis = new ArrayList(); // Initialize one key manager for each table of required covariate - for (Covariate covariate : requiredCovariates) { // Every required covariate table includes all preceding required covariates (e.g. RG ; RG,Q ) - requiredCovariatesUpToThis.add(covariate); - keyManagers.add(new BQSRKeyManager(requiredCovariatesUpToThis, emptyList)); - } - keyManagers.add(new BQSRKeyManager(requiredCovariates, optionalCovariates)); // One master key manager for the collapsed tables - - initializedKeyManagers = true; - } - addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap - } - } - - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e); - } catch (NumberFormatException e) { - throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker."); - } - - if (!sawEOF) { - final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool."; - throw new UserException.MalformedFile(RECAL_FILE, errorMessage); - } - - generateEmpiricalQualities(SMOOTHING_CONSTANT); - } - - - /** - * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches) - * - * @param file The CSV file we read the line from (for exception throwing purposes) - * @param line A line of CSV data read from the recalibration table data file - */ - private void addCSVData(final File file, final String line) { - final String[] vals = line.split(","); - boolean hasOptionalCovariates = optionalCovariates.size() > 0; // Do we have optional covariates in this key? - int addOptionalCovariates = hasOptionalCovariates ? 2 : 0; // If we have optional covariates at all, add two to the size of the array (to acommodate the covariate and the id) - final Object[] key = new Object[requiredCovariates.size() + addOptionalCovariates + 1]; // Reserve enough space for the required covariates, optional covariate, id and eventType - - int indexCovariateValue = key.length - 3; // In the order of keys, the optional covariate comes right after the required covariates - int indexCovariateID = key.length - 2; // followed by the covariate ID - int indexEventType = key.length - 1; // and the event type - - addKeysToArray(key, vals, requiredCovariates, 0); // Add the required covariates keys - - if (hasOptionalCovariates) { - key[indexCovariateID] = Short.parseShort(vals[indexCovariateID]); // Add the optional covariate ID - Covariate covariate = optionalCovariates.get((Short) key[indexCovariateID]); // Get the covariate object for this ID - key[indexCovariateValue] = covariate.getValue(vals[indexCovariateValue]); // Add the optional covariate value, given the ID - } - key[indexEventType] = EventType.eventFrom(vals[indexEventType]); // Add the event type - - int datumIndex = key.length; // The recal datum starts at the end of the key (after the event type) - long count = Long.parseLong(vals[datumIndex]); // Number of observations - long errors = Long.parseLong(vals[datumIndex + 1]); // Number of errors observed - double reportedQual = Double.parseDouble(vals[1]); // The reported Q score --> todo -- I don't like having the Q score hard coded in vals[1]. Generalize it! - final RecalDatum datum = new RecalDatum(count, errors, reportedQual, 0.0); // Create a new datum using the number of observations, number of mismatches, and reported quality score - - addToAllTables(key, datum); // Add that datum to all the collapsed tables which will be used in the sequential calculation - } - - /** - * Add the given mapping to all of the collapsed hash tables - * - * @param key The list of comparables that is the key for this mapping - * @param fullDatum The RecalDatum which is the data for this mapping - */ - private void addToAllTables(final Object[] key, final RecalDatum fullDatum) { - int nHashes = requiredCovariates.size(); // We will always need one hash per required covariate - if (optionalCovariates.size() > 0) // If we do have optional covariates - nHashes += 1; // we will need one extra hash table with the optional covariate encoded in the key set on top of the required covariates + for (Covariate cov : requestedCovariates) + cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - - for (int hashIndex = 0; hashIndex < nHashes; hashIndex++) { - HashMap table; // object to hold the hash table we are going to manipulate - if (hashIndex >= collapsedHashes.size()) { // if we haven't yet created the collapsed hash table for this index, create it now! - table = new HashMap(); - collapsedHashes.add(table); // Because this is the only place where we add tables to the ArrayList, they will always be in the order we want. - } - else - table = collapsedHashes.get(hashIndex); // if the table has been previously created, just assign it to the "table" object for manipulation - - int copyTo = hashIndex + 1; // this will copy the covariates up to the index of the one we are including now (1 for RG, 2 for QS,...) - if (copyTo > requiredCovariates.size()) // only in the case where we have optional covariates we need to increase the size of the array - copyTo = requiredCovariates.size() + 2; // if we have optional covarites, add the optional covariate and it's id to the size of the key - Object[] tableKey = new Object[copyTo + 1]; // create a new array that will hold as many keys as hashIndex (1 for RG hash, 2 for QualityScore hash, 3 for covariate hash plus the event type - System.arraycopy(key, 0, tableKey, 0, copyTo); // copy the keys for the corresponding covariates into the tableKey. - tableKey[tableKey.length-1] = key[key.length - 1]; // add the event type. The event type is always the last key, on both key sets. + keysAndTablesMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + for (Covariate covariate : requiredCovariates) { + requiredCovariatesToAdd.add(covariate); + final Map table; // initializing a new recal table for each required covariate (cumulatively) + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager - BitSet hashKey = keyManagers.get(hashIndex).bitSetFromKey(tableKey); // Add bitset key with fullDatum to the appropriate hash - RecalDatum datum = table.get(hashKey); - if (datum == null) - datum = fullDatum; - else if (hashIndex == 0) // Special case for the ReadGroup covariate - datum.combine(fullDatum); + int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) + if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table + final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); + table = parseReadGroupTable(keyManager, reportTable); + } + else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table + final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE); + table = parseQualityScoreTable(keyManager, reportTable); + } else - datum.increment(fullDatum); - table.put(hashKey, datum); + throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION); + + keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map } + + + final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager + final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); + final Map table = parseAllCovariatesTable(keyManager, reportTable); + keysAndTablesMap.put(keyManager, table); // adding the pair table+key to the map } /** - * Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score - * that will be used in the sequential calculation in TableRecalibrationWalker + * Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table * - * @param smoothing The smoothing parameter that goes into empirical quality score calculation + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private void generateEmpiricalQualities(final int smoothing) { - for (final HashMap table : collapsedHashes) - for (final RecalDatum datum : table.values()) - datum.calcCombinedEmpiricalQuality(smoothing, QualityUtils.MAX_QUAL_SCORE); + private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + ArrayList columnNamesOrderedList = new ArrayList(5); + columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); + return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); } + /** + * + * Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. + */ + private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + ArrayList columnNamesOrderedList = new ArrayList(3); + columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); + return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); + } + /** + * Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table + * + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. + */ + private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + ArrayList columnNamesOrderedList = new ArrayList(2); + columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); + columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); + return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList); + } + /** + * Shared parsing functionality for all tables. + * + * @param keyManager the key manager for this table + * @param reportTable the GATKReport table containing data for this table + * @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table + * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. + */ + private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList columnNamesOrderedList) { + Map result = new HashMap(reportTable.getNumRows()*2); - public void recalibrateRead(final GATKSAMRecord read) { - //compute all covariate values for this read - RecalDataManager.computeCovariates(read, requestedCovariates); - final ReadCovariates readCovariates = RecalDataManager.covariateKeySetFrom(read); + for (Object primaryKey : reportTable.getPrimaryKeys()) { + int nKeys = columnNamesOrderedList.size(); + Object [] keySet = new Object[nKeys]; + for (int i = 0; i < nKeys; i++) + keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) + keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager). + BitSet bitKey = keyManager.bitSetFromKey(keySet); - for (final EventType errorModel : EventType.values()) { + double estimatedQReported = (Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME); + double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + EmpiricalQual empiricalQual = new EmpiricalQual(estimatedQReported, empiricalQuality); + + result.put(bitKey, empiricalQual); + } + return result; + } + + /** + * Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores + * + * @param table the GATKReportTable containing the quantization mappings + * @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE + */ + private List initializeQuantizationTable(GATKReportTable table) { + Byte[] result = new Byte[QualityUtils.MAX_QUAL_SCORE + 1]; + for (Object primaryKey : table.getPrimaryKeys()) { + Object value = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + byte originalQual = Byte.parseByte(primaryKey.toString()); + byte quantizedQual = Byte.parseByte(value.toString()); + result[originalQual] = quantizedQual; + } + return Arrays.asList(result); + } + + /** + * Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values + * + * @param table the GATKReportTable containing the arguments and its corresponding values + * @return a RAC object properly initialized with all the objects in the table + */ + private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) { + RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + + for (Object primaryKey : table.getPrimaryKeys()) { + Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + if (value.equals("null")) + value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport + + if (primaryKey.equals("covariate") && value != null) + RAC.COVARIATES = value.toString().split(","); + + else if (primaryKey.equals("standard_covs")) + RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); + + else if (primaryKey.equals("solid_recal_mode")) + RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); + + else if (primaryKey.equals("solid_nocall_strategy")) + RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); + + else if (primaryKey.equals("mismatches_context_size")) + RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); + + else if (primaryKey.equals("insertions_context_size")) + RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value); + + else if (primaryKey.equals("deletions_context_size")) + RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value); + + else if (primaryKey.equals("mismatches_default_quality")) + RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value); + + else if (primaryKey.equals("insertions_default_quality")) + RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value); + + else if (primaryKey.equals("deletions_default_quality")) + RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value); + + else if (primaryKey.equals("low_quality_tail")) + RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value); + + else if (primaryKey.equals("default_platform")) + RAC.DEFAULT_PLATFORM = (String) value; + + else if (primaryKey.equals("force_platform")) + RAC.FORCE_PLATFORM = (String) value; + + else if (primaryKey.equals("quantizing_levels")) + RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value); + } + + return RAC; + } + + /** + * Recalibrates the base qualities of a read + * + * It updates the base qualities of the read with the new recalibrated qualities (for all event types) + * + * @param read the read to recalibrate + */ + public void recalibrateRead(final GATKSAMRecord read) { + final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); // compute all covariates for the read + for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings final byte[] originalQuals = read.getBaseQualities(errorModel); final byte[] recalQuals = originalQuals.clone(); - // For each base in the read - for (int offset = 0; offset < read.getReadLength(); offset++) { - final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); - final byte qualityScore = performSequentialQualityCalculation(keySet, errorModel); + for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read + byte qualityScore = originalQuals[offset]; + + if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model + qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base + } recalQuals[offset] = qualityScore; } - - preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low read.setBaseQualities(recalQuals, errorModel); } } @@ -286,86 +305,66 @@ public class BaseRecalibration { * * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) * - * todo -- I extremely dislike the way all this math is hardcoded... should rethink the data structures for this method in particular. - * - * @param key The list of Comparables that were calculated from the covariates + * @param key The list of Comparables that were calculated from the covariates * @param errorModel the event type * @return A recalibrated quality score as a byte */ private byte performSequentialQualityCalculation(BitSet[] key, EventType errorModel) { final byte qualFromRead = (byte) BitSetUtils.shortFrom(key[1]); - - final int readGroupKeyIndex = 0; - final int qualKeyIndex = 1; - final int covariatesKeyIndex = 2; - - // The global quality shift (over the read group only) - List bitKeys = keyManagers.get(readGroupKeyIndex).bitSetsFromAllKeys(key, errorModel); - if (bitKeys.size() > 1) - throw new ReviewedStingException("There should only be one key for the RG collapsed table, something went wrong here"); - - final RecalDatum globalRecalDatum = collapsedHashes.get(readGroupKeyIndex).get(bitKeys.get(0)); + double globalDeltaQ = 0.0; - if (globalRecalDatum != null) { - final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality(); - final double aggregrateQReported = globalRecalDatum.getEstimatedQReported(); - globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; - } - - // The shift in quality between reported and empirical - bitKeys = keyManagers.get(qualKeyIndex).bitSetsFromAllKeys(key, errorModel); - if (bitKeys.size() > 1) - throw new ReviewedStingException("There should only be one key for the Qual collapsed table, something went wrong here"); - - final RecalDatum qReportedRecalDatum = collapsedHashes.get(qualKeyIndex).get(bitKeys.get(0)); double deltaQReported = 0.0; - if (qReportedRecalDatum != null) { - final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality(); - deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; - } - - // The shift in quality due to each covariate by itself in turn - bitKeys = keyManagers.get(covariatesKeyIndex).bitSetsFromAllKeys(key, errorModel); double deltaQCovariates = 0.0; - double deltaQCovariateEmpirical; - for (BitSet k : bitKeys) { - final RecalDatum covariateRecalDatum = collapsedHashes.get(covariatesKeyIndex).get(k); - if (covariateRecalDatum != null) { - deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality(); - deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); + + for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { + BQSRKeyManager keyManager = mapEntry.getKey(); + Map table = mapEntry.getValue(); + + switch(keyManager.getRequiredCovariates().size()) { + case 1: // this is the ReadGroup table + List bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group + if (bitKeys.size() > 1) + throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); + + final EmpiricalQual empiricalQualRG = table.get(bitKeys.get(0)); + if (empiricalQualRG != null) { + final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(); + final double aggregrateQReported = empiricalQualRG.getEstimatedQReported(); + globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported; + } + break; + case 2: + if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table + bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the reported quality score + if (bitKeys.size() > 1) + throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); + + final EmpiricalQual empiricalQualQS = table.get(bitKeys.get(0)); + if (empiricalQualQS != null) { + final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(); + deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; + } + } + else { // this is the table with all the covariates + bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn + for (BitSet k : bitKeys) { + final EmpiricalQual empiricalQualCO = table.get(k); + if (empiricalQualCO != null) { + double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); + deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); + } + } + } + break; + default: + throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION); } } - final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; - return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE); + double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula + recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL + + return qualQuantizationMap.get((int) recalibratedQual); // return the quantized version of the recalibrated quality } - /** - * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold - * - * @param originalQuals The list of original base quality scores - * @param recalQuals A list of the new recalibrated quality scores - */ - private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) { - for (int iii = 0; iii < recalQuals.length; iii++) { - if (originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter - recalQuals[iii] = originalQuals[iii]; - } - } - } - - /** - * Shared functionality to add keys - * - * @param array the target array we are creating the keys in - * @param keys the actual keys we're using as a source - * @param covariateList the covariate list to loop through - * @param keyIndex the index in the keys and the arrays objects to run from - */ - private void addKeysToArray(final Object[] array, final String[] keys, List covariateList, int keyIndex) { - for (Covariate covariate : covariateList) { - array[keyIndex] = covariate.getValue(keys[keyIndex]); - keyIndex++; - } - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index ff7d12f09..df1ff2a0e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -13,6 +13,8 @@ import org.broadinstitute.sting.utils.NGSPlatform; */ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { + public static String LANE_TAG = "LN"; + // the SAMReadGroupRecord data we're caching private String mSample = null; private String mPlatform = null; @@ -79,4 +81,12 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { return mNGSPlatform; } + + public String getLane() { + return this.getAttribute(LANE_TAG); + } + + public void setLane(String lane) { + this.setAttribute(LANE_TAG, lane); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 6b43479dc..51c3715f3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -52,8 +52,8 @@ public class GATKSAMRecord extends BAMRecord { public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end // Base Quality Score Recalibrator specific attribute tags - public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; - public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; + public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions + public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions // the SAMRecord data we're caching private String mReadString = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 9d731e489..cbb4120dd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -698,6 +698,13 @@ public class ReadUtils { return bases; } + public static GATKSAMRecord createRandomRead(int length) { + byte[] quals = ReadUtils.createRandomReadQuals(length); + byte[] bbases = ReadUtils.createRandomReadBases(length, true); + return ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + } + + public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) { String[] sequenceRecordNames = new String[sequenceDictionary.size()]; int sequenceRecordIndex = 0; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index ae3dff9ed..1f3aead07 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -222,7 +222,10 @@ public class GenotypeLikelihoods { /* * a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles */ - private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[]{ new GenotypeLikelihoodsAllelePair(0, 0) }; + private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex; + static { + calculatePLcache(65); // start with data for 10 alternate alleles + } private static void calculatePLcache(final int minIndex) { // how many alternate alleles do we need to calculate for? @@ -266,6 +269,7 @@ public class GenotypeLikelihoods { */ public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) { // make sure that we've cached enough data + // TODO -- this is not thread-safe! if ( PLindex >= PLIndexToAlleleIndex.length ) calculatePLcache(PLindex); diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 8cc2ffd96..192c86fe3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -25,9 +25,13 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; + /** * */ @@ -73,4 +77,45 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { @Test() private void testMissingInterval() { testMissingFile("missing interval", "-T UnifiedGenotyper -L missing.interval_list -I " + b37GoodBAM); } + + + // -------------------------------------------------------------------------------- + // + // Test that our exceptions are coming back as we expect + // + // -------------------------------------------------------------------------------- + + private class EngineErrorHandlingTestProvider extends TestDataProvider { + Class expectedException; + boolean multiThreaded; + + public EngineErrorHandlingTestProvider(Class exceptedException, final boolean multiThreaded) { + super(EngineErrorHandlingTestProvider.class); + this.expectedException = exceptedException; + this.multiThreaded = multiThreaded; + setName(String.format("Engine error handling: expected %s, is-multithreaded %b", exceptedException, multiThreaded)); + } + } + + @DataProvider(name = "EngineErrorHandlingTestProvider") + public Object[][] makeEngineErrorHandlingTestProvider() { + for ( final boolean multiThreaded : Arrays.asList(true, false)) { + new EngineErrorHandlingTestProvider(NullPointerException.class, multiThreaded); + new EngineErrorHandlingTestProvider(UserException.class, multiThreaded); + new EngineErrorHandlingTestProvider(ReviewedStingException.class, multiThreaded); + } + + return EngineErrorHandlingTestProvider.getTests(EngineErrorHandlingTestProvider.class); + } + + // + // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type + // + @Test(dataProvider = "EngineErrorHandlingTestProvider") + public void testEngineErrorHandlingTestProvider(EngineErrorHandlingTestProvider cfg) { + final String root = "-T ErrorThrowing -R " + b37KGReference; + final String args = root + (cfg.multiThreaded ? " -nt 2" : "") + " -E " + cfg.expectedException.getSimpleName(); + WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException); + executeTest(cfg.toString(), spec); + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index 30a9bad3e..2b4cb2ae3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -1,9 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; @@ -11,7 +9,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.util.BitSet; -import java.util.Random; /** * @author Mauricio Carneiro @@ -20,22 +17,18 @@ import java.util.Random; public class ContextCovariateUnitTest { ContextCovariate covariate; RecalibrationArgumentCollection RAC; - Random random; @BeforeClass public void init() { RAC = new RecalibrationArgumentCollection(); covariate = new ContextCovariate(); - random = GenomeAnalysisEngine.getRandomGenerator(); covariate.initialize(RAC); } @Test(enabled = true) public void testSimpleContexts() { - byte[] quals = ReadUtils.createRandomReadQuals(10000); - byte[] bbases = ReadUtils.createRandomReadBases(10000, true); - GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + GATKSAMRecord read = ReadUtils.createRandomRead(1000); GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); CovariateValues values = covariate.getValues(read); verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases())); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java index 49315672c..d80cddd3e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -10,7 +8,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.util.BitSet; -import java.util.Random; /** * @author Mauricio Carneiro @@ -19,22 +16,18 @@ import java.util.Random; public class CycleCovariateUnitTest { CycleCovariate covariate; RecalibrationArgumentCollection RAC; - Random random; @BeforeClass public void init() { RAC = new RecalibrationArgumentCollection(); covariate = new CycleCovariate(); - random = GenomeAnalysisEngine.getRandomGenerator(); covariate.initialize(RAC); } @Test(enabled = true) public void testSimpleCycles() { - short readLength = 10; - byte[] quals = ReadUtils.createRandomReadQuals(readLength); - byte[] bbases = ReadUtils.createRandomReadBases(readLength, true); - GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M"); + short readLength = 10; + GATKSAMRecord read = ReadUtils.createRandomRead(readLength); read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); read.getReadGroup().setPlatform("illumina"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java new file mode 100644 index 000000000..6276022d1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.BitSet; + +/** + * @author Mauricio Carneiro + * @since 3/1/12 + */ +public class ReadGroupCovariateUnitTest { + ReadGroupCovariate covariate; + RecalibrationArgumentCollection RAC; + + @BeforeClass + public void init() { + RAC = new RecalibrationArgumentCollection(); + covariate = new ReadGroupCovariate(); + covariate.initialize(RAC); + } + + @Test(enabled = true) + public void testSingleRecord() { + final String expected = "SAMPLE.1"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); + rg.setSample("SAMPLE"); + rg.setLane("1"); + runTest(rg, expected); + } + + @Test(enabled = true) + public void testMissingLane() { + final String expected = "SAMPLE.7"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.7"); + rg.setSample("SAMPLE"); + runTest(rg, expected); + } + + @Test(enabled = true) + public void testMissingSample() { + final String expected = "MY.ID"; + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID"); + rg.setLane("1"); + runTest(rg, expected); + } + + private void runTest(GATKSAMReadGroupRecord rg, String expected) { + GATKSAMRecord read = ReadUtils.createRandomRead(10); + read.setReadGroup(rg); + CovariateValues values = covariate.getValues(read); + verifyCovariateArray(values.getMismatches(), expected); + + } + + private void verifyCovariateArray(BitSet[] values, String expected) { + for (BitSet value : values) { + String actual = covariate.keyFromBitSet(value); + Assert.assertEquals(actual, expected); + } + } + +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index e1d22f107..b78e76e07 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -80,4 +80,24 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } + @Test + public void test7() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10) + + " -L chr20:332341-802503", + 1, + Arrays.asList("c37548b333b65f58d0edfc5c2a62a28a")); + executeTest("Use trio-phased VCF, but ignore its phasing [TEST SEVEN]", spec); + } + + @Test + public void test8() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10) + + " -L chr20:332341-802503" + " -respectPhaseInInput", + 1, + Arrays.asList("dfc7cdddd702e63d46d04f61a3ecd720")); + executeTest("Use trio-phased VCF, and respect its phasing [TEST EIGHT]", spec); + } + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 9f69554fe..610733d9c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -94,7 +94,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("94fb8cba9e236131c6fbf1d7fee738fe") + Arrays.asList("7a726ecbedd722fa7cd4de3e023b7a82") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -115,7 +115,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("670979268b05c3024297ba98d67d89ab") + Arrays.asList("95bb4a4267a8f29dd7a8169561499f20") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -137,7 +137,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c38ce9c872a76ae7dd26c3e353bf0765") + Arrays.asList("9b51029083495935823fb0447a2857b9") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -158,7 +158,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2c37f23bf6114a2b27f21ed445806fd2") + Arrays.asList("318b5fbbc61e2fc11d49369359812edd") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -179,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("206f0d629de9af0b97340cb22d34a81b") + Arrays.asList("74c02df2ef69dda231a2aec2a948747b") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -200,7 +200,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("bd869725429deae8f56175ba9a8ab390") + Arrays.asList("2d97b1fe15e532e89803ba7ba347ff20") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -221,7 +221,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9c7f6783a57ad681bb754b5e71de27dc") + Arrays.asList("474cbc231ddbc4ba299ffe61a17405b6") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -244,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a2d280440aa3771937f3d2d10f1eea74") + Arrays.asList("2cc9bc4bbe8b4edb6dc27642ec41f66e") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -269,7 +269,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2925d811dd521beb00059f8c8e818d83") + Arrays.asList("00c94cf3e14bc2855d39bbefa27f9bb2") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -288,7 +288,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("4b79bf2dfd73ddac0ceb0838a352bf9a") + Arrays.asList("a0c0d4805db1245aa30a306aa506096f") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -301,7 +301,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("1739654de350541edf429888b708ae01")); + 1, Arrays.asList("2192418a70a8e018a1675d4f425155f3")); executeTestParallel("testSelect1", spec); } @@ -329,7 +329,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d57cf846bc26d338edcf181fb0c85535")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2282523336c24d434d1cc0eb1697b4f9")); executeTestParallel("testCompVsEvalAC",spec); } @@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("b663745a39f62bfa5b5d486811cf57ec")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ec321fcc424fbad74a4a74e739173d03")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -371,7 +371,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("f1e1b1469dca86d72ae79a2d3e10612c")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ccaea6245086552cd63f828eabddfaf3")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -449,7 +449,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("0c632b5be8a54e43afa576510b40c4da") + Arrays.asList("9954c769ef37c47d3b61481ab0807be0") ); executeTest("testAlleleCountStrat", spec); } @@ -470,7 +470,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("92404820a94e7cfb854ae73450a0fbd9") + Arrays.asList("c0d69ce7647a575d166d8bab5aa16299") ); executeTest("testIntervalStrat", spec); } @@ -487,7 +487,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("417875ab1924b7e7950fa10daee393d2") + Arrays.asList("9a8ffb506118c1bde6f7bfadc4fb6f10") ); executeTest("testModernVCFWithLargeIndels", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index d74aac79d..5282c9e58 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("ab72f4bfb16d3894942149173a087647")); + Arrays.asList("ee43a558fd3faeaa447acab89f0001d5")); executeTest("threeWayWithRefs", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java index 3e50a5fd1..a372ef3f0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java @@ -1,5 +1,10 @@ package org.broadinstitute.sting.utils.recalibration; +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.utils.NGSPlatform; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.annotations.Test; import java.io.File; @@ -12,10 +17,13 @@ import java.io.File; */ public class BaseRecalibrationUnitTest { - @Test(enabled=true) - public void testReadingCSV() { - File csv = new File("public/testdata/exampleCSV.csv"); + @Test(enabled=false) + public void testReadingReport() { + File csv = new File("public/testdata/exampleGATKREPORT.grp"); BaseRecalibration baseRecalibration = new BaseRecalibration(csv); + GATKSAMRecord read = ReadUtils.createRandomRead(1000); + read.setReadGroup(new GATKSAMReadGroupRecord(new SAMReadGroupRecord("exampleBAM.bam.bam"), NGSPlatform.ILLUMINA)); + baseRecalibration.recalibrateRead(read); System.out.println("Success"); } } diff --git a/public/testdata/exampleCSV.csv b/public/testdata/exampleCSV.csv deleted file mode 100644 index 4bd052195..000000000 --- a/public/testdata/exampleCSV.csv +++ /dev/null @@ -1,1362 +0,0 @@ -# Counted Sites 312 -# Counted Bases 380 -# Skipped Sites 0 -# Fraction Skipped 1 / Infinity bp -# Required Covariates (in order): ReadGroup,QualityScore -# Optional Covariates (in order): Context,Cycle -# Recalibration Data (in order): CovariateID,EventType,nObservations,nMismatches,Qempirical -exampleBAM.bam,45,TGAAAGTG,0,D,1,0,40 -exampleBAM.bam,45,TGGTATTA,0,D,1,0,40 -exampleBAM.bam,45,AGCCTCGT,0,D,1,0,40 -exampleBAM.bam,45,CTGTGTCT,0,D,1,0,40 -exampleBAM.bam,45,CTTTGTAT,0,I,1,0,40 -exampleBAM.bam,45,CTTAAGTG,0,D,1,0,40 -exampleBAM.bam,45,CTTTATTA,0,D,1,0,40 -exampleBAM.bam,45,23,1,I,5,0,40 -exampleBAM.bam,45,27,1,D,5,0,40 -exampleBAM.bam,45,ATTCTATT,0,I,1,0,40 -exampleBAM.bam,45,CTAATCTC,0,I,1,0,40 -exampleBAM.bam,34,GC,0,M,2,0,40 -exampleBAM.bam,8,TG,0,M,3,0,40 -exampleBAM.bam,45,TAGAGTTT,0,I,1,0,40 -exampleBAM.bam,9,TA,0,M,1,0,40 -exampleBAM.bam,45,GGTTCGGG,0,I,3,0,40 -exampleBAM.bam,45,AGTTTCAC,0,I,1,0,40 -exampleBAM.bam,45,CATTTCAC,0,I,1,0,40 -exampleBAM.bam,16,7,1,M,1,0,40 -exampleBAM.bam,5,76,1,M,1,0,40 -exampleBAM.bam,45,CATGATAA,0,D,1,0,40 -exampleBAM.bam,45,53,1,I,5,0,40 -exampleBAM.bam,45,57,1,D,5,0,40 -exampleBAM.bam,25,52,1,M,1,0,40 -exampleBAM.bam,45,TGGCAGCC,0,D,1,0,40 -exampleBAM.bam,33,CT,0,M,6,0,40 -exampleBAM.bam,45,AAGTGACA,0,I,1,0,40 -exampleBAM.bam,45,AGTGACAT,0,I,1,0,40 -exampleBAM.bam,45,AGAGTTTC,0,I,1,0,40 -exampleBAM.bam,45,CTCTTTGT,0,D,1,0,40 -exampleBAM.bam,45,GCCTGAAA,0,D,1,0,40 -exampleBAM.bam,12,25,1,M,1,0,40 -exampleBAM.bam,34,75,1,M,1,0,40 -exampleBAM.bam,32,41,1,M,2,0,40 -exampleBAM.bam,21,GG,0,M,2,0,40 -exampleBAM.bam,26,50,1,M,1,0,40 -exampleBAM.bam,45,ACCTGGAG,0,D,1,0,40 -exampleBAM.bam,45,CACAGCAA,0,D,1,0,40 -exampleBAM.bam,20,GA,0,M,1,0,40 -exampleBAM.bam,45,AGGTGGAG,0,D,1,0,40 -exampleBAM.bam,45,GCAAAATC,0,I,1,0,40 -exampleBAM.bam,27,TA,0,M,4,0,40 -exampleBAM.bam,27,18,1,M,1,0,40 -exampleBAM.bam,32,CC,0,M,1,0,40 -exampleBAM.bam,45,AAAATCTA,0,I,1,0,40 -exampleBAM.bam,45,22,1,I,5,0,40 -exampleBAM.bam,45,26,1,D,5,0,40 -exampleBAM.bam,33,76,1,M,1,0,40 -exampleBAM.bam,30,24,1,M,1,0,40 -exampleBAM.bam,45,TTCTATTC,0,D,1,0,40 -exampleBAM.bam,45,GTCAATGT,0,I,1,0,40 -exampleBAM.bam,21,73,1,M,1,0,40 -exampleBAM.bam,17,4,1,M,1,0,40 -exampleBAM.bam,8,17,1,M,1,0,40 -exampleBAM.bam,34,GA,0,M,1,0,40 -exampleBAM.bam,45,ATCGTGAG,0,I,1,0,40 -exampleBAM.bam,45,CCAGATCC,0,I,1,0,40 -exampleBAM.bam,45,GATCGTGA,0,D,1,0,40 -exampleBAM.bam,45,52,1,I,5,0,40 -exampleBAM.bam,45,56,1,D,5,0,40 -exampleBAM.bam,9,TC,0,M,1,0,40 -exampleBAM.bam,23,CT,0,M,2,0,40 -exampleBAM.bam,31,26,1,M,2,0,40 -exampleBAM.bam,45,ATGTGAAC,0,D,1,0,40 -exampleBAM.bam,45,ATTACTCT,0,I,1,0,40 -exampleBAM.bam,45,ACACAGCA,0,D,1,0,40 -exampleBAM.bam,26,TT,0,M,1,0,40 -exampleBAM.bam,45,GGGTTTGG,0,D,2,0,40 -exampleBAM.bam,33,8,1,M,1,0,40 -exampleBAM.bam,21,GT,0,M,2,0,40 -exampleBAM.bam,34,74,1,M,1,0,40 -exampleBAM.bam,45,ATTCTTAA,0,I,1,0,40 -exampleBAM.bam,45,GAGCCTTT,0,D,1,0,40 -exampleBAM.bam,20,GC,0,M,1,0,40 -exampleBAM.bam,45,GGTTAGGG,0,D,2,0,40 -exampleBAM.bam,33,42,1,M,1,0,40 -exampleBAM.bam,45,GTGCAAAG,0,I,1,0,40 -exampleBAM.bam,6,75,1,M,1,0,40 -exampleBAM.bam,27,TC,0,M,1,0,40 -exampleBAM.bam,32,CA,0,M,2,0,40 -exampleBAM.bam,29,60,1,M,1,0,40 -exampleBAM.bam,34,13,1,M,1,0,40 -exampleBAM.bam,34,GT,0,M,2,0,40 -exampleBAM.bam,21,74,1,M,1,0,40 -exampleBAM.bam,45,GTTAATGA,0,I,1,0,40 -exampleBAM.bam,45,TATTATTG,0,D,1,0,40 -exampleBAM.bam,24,52,1,M,1,0,40 -exampleBAM.bam,45,CTTTCAGG,0,I,1,0,40 -exampleBAM.bam,45,GACATGGT,0,D,1,0,40 -exampleBAM.bam,45,ATCATGGT,0,D,1,0,40 -exampleBAM.bam,45,21,1,I,5,0,40 -exampleBAM.bam,45,25,1,D,5,0,40 -exampleBAM.bam,34,47,1,M,1,0,40 -exampleBAM.bam,31,25,1,M,1,0,40 -exampleBAM.bam,19,71,1,M,1,0,40 -exampleBAM.bam,6,GG,0,M,4,1,6 -exampleBAM.bam,9,16,1,M,1,0,40 -exampleBAM.bam,45,TCCAGTTC,0,I,1,0,40 -exampleBAM.bam,45,TTCACATG,0,D,1,0,40 -exampleBAM.bam,45,TAAGTGAC,0,I,1,0,40 -exampleBAM.bam,45,GTGACATG,0,D,1,0,40 -exampleBAM.bam,45,55,1,I,5,0,40 -exampleBAM.bam,45,59,1,D,5,0,40 -exampleBAM.bam,45,CATGATCG,0,I,1,0,40 -exampleBAM.bam,16,AT,0,M,1,0,40 -exampleBAM.bam,32,43,1,M,3,0,40 -exampleBAM.bam,19,33,1,M,1,0,40 -exampleBAM.bam,21,GA,0,M,2,0,40 -exampleBAM.bam,45,GTATTTGC,0,D,1,0,40 -exampleBAM.bam,26,TA,0,M,1,0,40 -exampleBAM.bam,45,TCTTAAGT,0,D,1,0,40 -exampleBAM.bam,33,CC,0,M,1,0,40 -exampleBAM.bam,11,20,1,M,1,0,40 -exampleBAM.bam,28,61,1,M,1,0,40 -exampleBAM.bam,18,1,1,M,1,0,40 -exampleBAM.bam,45,ACCCAGAT,0,I,1,0,40 -exampleBAM.bam,45,AAAGACAC,0,I,1,0,40 -exampleBAM.bam,45,GCCTTTGC,0,D,1,0,40 -exampleBAM.bam,27,16,1,M,1,0,40 -exampleBAM.bam,27,TG,0,M,2,0,40 -exampleBAM.bam,32,CT,0,M,1,0,40 -exampleBAM.bam,21,44,1,M,1,0,40 -exampleBAM.bam,45,TATTACTC,0,I,1,0,40 -exampleBAM.bam,45,TGGGCTGG,0,I,1,0,40 -exampleBAM.bam,16,65,1,M,1,0,40 -exampleBAM.bam,34,GG,0,M,2,0,40 -exampleBAM.bam,25,21,1,M,1,0,40 -exampleBAM.bam,22,9,1,M,1,0,40 -exampleBAM.bam,45,CAGGCCAC,0,D,1,0,40 -exampleBAM.bam,45,20,1,I,5,0,40 -exampleBAM.bam,45,24,1,D,5,0,40 -exampleBAM.bam,30,26,1,M,1,0,40 -exampleBAM.bam,45,TTGTATTT,0,D,1,0,40 -exampleBAM.bam,24,53,1,M,1,0,40 -exampleBAM.bam,23,CC,0,M,1,0,40 -exampleBAM.bam,19,70,1,M,1,1,1 -exampleBAM.bam,25,55,1,M,1,0,40 -exampleBAM.bam,45,AGGCCACC,0,I,1,0,40 -exampleBAM.bam,45,54,1,I,5,0,40 -exampleBAM.bam,45,58,1,D,5,0,40 -exampleBAM.bam,45,ACTTTCAG,0,I,1,0,40 -exampleBAM.bam,45,AAAGTGCA,0,D,1,0,40 -exampleBAM.bam,45,ATTGATAT,0,D,1,0,40 -exampleBAM.bam,45,AATGTGAA,0,I,1,0,40 -exampleBAM.bam,9,TT,0,M,1,0,40 -exampleBAM.bam,19,32,1,M,1,0,40 -exampleBAM.bam,29,28,1,M,1,0,40 -exampleBAM.bam,45,CGGGTTTG,0,I,2,0,40 -exampleBAM.bam,45,TCTTTGTA,0,I,1,0,40 -exampleBAM.bam,33,10,1,M,1,0,40 -exampleBAM.bam,33,CA,0,M,2,0,40 -exampleBAM.bam,45,GTTCGGGT,0,I,3,0,40 -exampleBAM.bam,27,TT,0,M,2,0,40 -exampleBAM.bam,27,17,1,M,1,0,40 -exampleBAM.bam,45,CAGCAAAA,0,I,1,0,40 -exampleBAM.bam,45,GGCAGCCT,0,I,1,0,40 -exampleBAM.bam,20,GT,0,M,1,1,1 -exampleBAM.bam,45,TGGAGCCT,0,I,1,0,40 -exampleBAM.bam,45,TGGTGGCC,0,I,1,0,40 -exampleBAM.bam,28,30,1,M,1,0,40 -exampleBAM.bam,33,40,1,M,1,0,40 -exampleBAM.bam,24,TG,0,M,2,0,40 -exampleBAM.bam,45,TGTGTCTT,0,I,1,0,40 -exampleBAM.bam,45,TCAATAAT,0,I,1,0,40 -exampleBAM.bam,45,TCTCCAGG,0,I,1,0,40 -exampleBAM.bam,45,49,1,I,5,0,40 -exampleBAM.bam,45,61,1,D,5,0,40 -exampleBAM.bam,45,CCTCGTCC,0,D,1,0,40 -exampleBAM.bam,45,GGCACCCA,0,I,1,0,40 -exampleBAM.bam,22,44,1,M,2,0,40 -exampleBAM.bam,45,AGGTTATC,0,I,1,0,40 -exampleBAM.bam,34,41,1,M,1,0,40 -exampleBAM.bam,19,65,1,M,1,0,40 -exampleBAM.bam,23,12,1,M,1,0,40 -exampleBAM.bam,23,GG,0,M,1,0,40 -exampleBAM.bam,45,TTGGGTTC,0,I,1,0,40 -exampleBAM.bam,45,TTCTGTGT,0,D,1,0,40 -exampleBAM.bam,45,TGTTGGTT,0,I,1,0,40 -exampleBAM.bam,24,50,1,M,1,0,40 -exampleBAM.bam,45,GTTTCACA,0,I,1,0,40 -exampleBAM.bam,45,TCGGGTTC,0,I,1,0,40 -exampleBAM.bam,45,TAGGGTTC,0,I,1,0,40 -exampleBAM.bam,33,73,1,M,1,0,40 -exampleBAM.bam,9,52,1,M,1,0,40 -exampleBAM.bam,45,19,1,I,5,0,40 -exampleBAM.bam,45,31,1,D,5,0,40 -exampleBAM.bam,25,TA,0,M,3,0,40 -exampleBAM.bam,34,11,1,M,1,0,40 -exampleBAM.bam,34,CC,0,M,1,0,40 -exampleBAM.bam,28,25,1,M,1,0,40 -exampleBAM.bam,45,TAGATTTT,0,I,1,0,40 -exampleBAM.bam,45,GGTTGGGG,0,I,2,0,40 -exampleBAM.bam,45,GGCTGGGG,0,I,1,0,40 -exampleBAM.bam,45,GATTAGAT,0,I,1,0,40 -exampleBAM.bam,5,GG,0,M,3,1,5 -exampleBAM.bam,32,15,1,M,1,0,40 -exampleBAM.bam,27,22,1,M,1,0,40 -exampleBAM.bam,21,42,1,M,1,0,40 -exampleBAM.bam,19,5,1,M,1,0,40 -exampleBAM.bam,19,AT,0,M,1,0,40 -exampleBAM.bam,45,TTTCAGGC,0,D,1,0,40 -exampleBAM.bam,45,TGCCAGGC,0,D,1,0,40 -exampleBAM.bam,45,GTCTTTAT,0,I,1,0,40 -exampleBAM.bam,45,TGAACTGG,0,I,1,0,40 -exampleBAM.bam,26,20,1,M,1,0,40 -exampleBAM.bam,45,TATTCTTA,0,D,1,0,40 -exampleBAM.bam,45,TGATAACC,0,D,1,0,40 -exampleBAM.bam,45,ATTTTTCT,0,D,1,0,40 -exampleBAM.bam,45,GGCTTTAT,0,I,1,0,40 -exampleBAM.bam,5,46,1,M,1,1,1 -exampleBAM.bam,29,27,1,M,1,0,40 -exampleBAM.bam,45,ATCCATTT,0,D,1,0,40 -exampleBAM.bam,45,48,1,I,5,0,40 -exampleBAM.bam,45,60,1,D,5,0,40 -exampleBAM.bam,45,GATCCAGT,0,I,1,0,40 -exampleBAM.bam,45,AATGAGTC,0,D,1,0,40 -exampleBAM.bam,24,TT,0,M,3,1,5 -exampleBAM.bam,45,TCTTTATA,0,I,1,0,40 -exampleBAM.bam,6,CC,0,M,1,0,40 -exampleBAM.bam,23,GT,0,M,2,0,40 -exampleBAM.bam,34,40,1,M,1,0,40 -exampleBAM.bam,45,18,1,I,5,0,40 -exampleBAM.bam,45,30,1,D,5,0,40 -exampleBAM.bam,45,CAAAATCT,0,I,1,0,40 -exampleBAM.bam,22,15,1,M,1,0,40 -exampleBAM.bam,45,CCAGGTTA,0,I,1,0,40 -exampleBAM.bam,45,TCATGGTG,0,I,1,0,40 -exampleBAM.bam,45,TCTAATCT,0,I,1,0,40 -exampleBAM.bam,45,TTGGGTTA,0,I,1,0,40 -exampleBAM.bam,45,TAGGGTTA,0,I,1,0,40 -exampleBAM.bam,45,GTTGGTTA,0,I,1,0,40 -exampleBAM.bam,33,72,1,M,1,0,40 -exampleBAM.bam,31,60,1,M,1,0,40 -exampleBAM.bam,34,CA,0,M,4,0,40 -exampleBAM.bam,45,CCCAGATC,0,D,1,0,40 -exampleBAM.bam,18,36,1,M,1,0,40 -exampleBAM.bam,16,70,1,M,1,0,40 -exampleBAM.bam,45,TGTATTTG,0,I,1,0,40 -exampleBAM.bam,33,46,1,M,1,0,40 -exampleBAM.bam,45,GGTTGGGT,0,I,1,0,40 -exampleBAM.bam,45,GTTTGGGT,0,I,1,0,40 -exampleBAM.bam,45,TTCTAGAG,0,I,1,0,40 -exampleBAM.bam,19,AG,0,M,1,0,40 -exampleBAM.bam,32,GA,0,M,2,0,40 -exampleBAM.bam,32,14,1,M,2,0,40 -exampleBAM.bam,12,62,1,M,1,0,40 -exampleBAM.bam,33,12,1,M,1,0,40 -exampleBAM.bam,45,GGTGGCCT,0,I,1,0,40 -exampleBAM.bam,4,GC,0,M,1,0,40 -exampleBAM.bam,27,53,1,M,2,0,40 -exampleBAM.bam,23,GA,0,M,1,0,40 -exampleBAM.bam,45,TTATTATT,0,I,1,0,40 -exampleBAM.bam,5,74,1,M,1,0,40 -exampleBAM.bam,45,ATGATAAC,0,I,1,0,40 -exampleBAM.bam,45,51,1,I,5,0,40 -exampleBAM.bam,45,63,1,D,5,0,40 -exampleBAM.bam,45,CACCCAGA,0,I,1,0,40 -exampleBAM.bam,45,CGTGAGTG,0,D,1,0,40 -exampleBAM.bam,45,GCTTTATT,0,I,1,0,40 -exampleBAM.bam,45,ATGGTGGC,0,D,1,0,40 -exampleBAM.bam,34,CT,0,M,2,0,40 -exampleBAM.bam,4,72,1,M,1,0,40 -exampleBAM.bam,45,TCGGGTTT,0,I,2,0,40 -exampleBAM.bam,24,48,1,M,1,0,40 -exampleBAM.bam,45,TCCATGAT,0,I,1,0,40 -exampleBAM.bam,45,CACATGAT,0,I,1,0,40 -exampleBAM.bam,45,17,1,I,5,0,40 -exampleBAM.bam,45,29,1,D,5,0,40 -exampleBAM.bam,45,ATCAATAA,0,D,1,0,40 -exampleBAM.bam,45,ACCATGAT,0,I,1,0,40 -exampleBAM.bam,32,GT,0,M,6,0,40 -exampleBAM.bam,19,7,1,M,1,0,40 -exampleBAM.bam,33,45,1,M,1,0,40 -exampleBAM.bam,28,27,1,M,1,0,40 -exampleBAM.bam,45,TCCATTTC,0,I,1,0,40 -exampleBAM.bam,45,GATAACCT,0,D,1,0,40 -exampleBAM.bam,45,AACTGGGA,0,I,1,0,40 -exampleBAM.bam,4,GG,0,M,1,0,40 -exampleBAM.bam,33,GC,0,M,1,0,40 -exampleBAM.bam,45,TCAGGCCA,0,I,1,0,40 -exampleBAM.bam,45,TTGCACTT,0,I,1,0,40 -exampleBAM.bam,45,TTCACTGA,0,I,1,0,40 -exampleBAM.bam,45,CTCCAGGT,0,D,1,0,40 -exampleBAM.bam,6,CT,0,M,1,0,40 -exampleBAM.bam,23,15,1,M,1,0,40 -exampleBAM.bam,25,51,1,M,1,0,40 -exampleBAM.bam,32,72,1,M,1,0,40 -exampleBAM.bam,34,42,1,M,1,0,40 -exampleBAM.bam,45,GATATAAA,0,I,1,0,40 -exampleBAM.bam,45,CTAGAGTT,0,D,1,0,40 -exampleBAM.bam,45,50,1,I,5,0,40 -exampleBAM.bam,45,62,1,D,5,0,40 -exampleBAM.bam,45,GCCACCAT,0,D,1,0,40 -exampleBAM.bam,45,GGGTTCGG,0,D,3,0,40 -exampleBAM.bam,24,TC,0,M,3,0,40 -exampleBAM.bam,25,TT,0,M,2,0,40 -exampleBAM.bam,45,16,1,I,5,0,40 -exampleBAM.bam,45,28,1,D,5,0,40 -exampleBAM.bam,45,ACATGGTA,0,I,1,0,40 -exampleBAM.bam,16,34,1,M,1,1,1 -exampleBAM.bam,45,AATCTCCA,0,D,1,0,40 -exampleBAM.bam,45,ATTTCACT,0,I,1,0,40 -exampleBAM.bam,22,GT,0,M,2,0,40 -exampleBAM.bam,45,ATATCAAT,0,D,1,0,40 -exampleBAM.bam,45,CAATGTGA,0,D,1,0,40 -exampleBAM.bam,45,GAGTCAAT,0,D,1,0,40 -exampleBAM.bam,24,49,1,M,1,0,40 -exampleBAM.bam,45,GGGGGTTG,0,I,1,0,40 -exampleBAM.bam,45,TAGGGTTG,0,I,1,0,40 -exampleBAM.bam,45,TGCAATCC,0,I,1,0,40 -exampleBAM.bam,45,TGGGGTTG,0,I,1,0,40 -exampleBAM.bam,45,TTAATGAG,0,I,1,0,40 -exampleBAM.bam,30,30,1,M,1,0,40 -exampleBAM.bam,23,75,1,M,1,0,40 -exampleBAM.bam,32,GG,0,M,5,0,40 -exampleBAM.bam,20,9,1,M,1,0,40 -exampleBAM.bam,20,CT,0,M,1,0,40 -exampleBAM.bam,45,ATTAGATT,0,D,1,0,40 -exampleBAM.bam,33,44,1,M,1,0,40 -exampleBAM.bam,45,TTTCTGTG,0,I,1,0,40 -exampleBAM.bam,45,TGGAGATT,0,D,1,0,40 -exampleBAM.bam,45,GTTTGGGC,0,I,1,0,40 -exampleBAM.bam,21,11,1,M,1,0,40 -exampleBAM.bam,29,24,1,M,1,0,40 -exampleBAM.bam,32,46,1,M,1,0,40 -exampleBAM.bam,27,55,1,M,1,0,40 -exampleBAM.bam,45,ATATAAAG,0,I,1,0,40 -exampleBAM.bam,45,GAGTTTCA,0,D,1,0,40 -exampleBAM.bam,45,CACTTTCA,0,D,1,0,40 -exampleBAM.bam,45,CCATTTCA,0,D,1,0,40 -exampleBAM.bam,45,CCAGGCAC,0,D,1,0,40 -exampleBAM.bam,11,TT,0,M,1,1,1 -exampleBAM.bam,45,TTTCACTG,0,I,1,0,40 -exampleBAM.bam,33,GA,0,M,1,0,40 -exampleBAM.bam,45,TCGTGAGT,0,I,1,0,40 -exampleBAM.bam,45,TACTCTTT,0,D,1,0,40 -exampleBAM.bam,45,TAATGAGT,0,I,1,0,40 -exampleBAM.bam,45,GTGTCTTT,0,D,1,0,40 -exampleBAM.bam,45,GGCTTTAT,0,D,1,0,40 -exampleBAM.bam,22,70,1,M,1,0,40 -exampleBAM.bam,45,ATTTTTCT,0,I,1,0,40 -exampleBAM.bam,45,TGCCAGGC,0,I,1,0,40 -exampleBAM.bam,33,1,1,M,2,0,40 -exampleBAM.bam,45,TTTCAGGC,0,I,1,0,40 -exampleBAM.bam,45,TATTCTTA,0,I,1,0,40 -exampleBAM.bam,45,TGATAACC,0,I,1,0,40 -exampleBAM.bam,45,GTCTTTAT,0,D,1,0,40 -exampleBAM.bam,45,TGAACTGG,0,D,1,0,40 -exampleBAM.bam,21,AG,0,M,2,0,40 -exampleBAM.bam,32,33,1,M,2,0,40 -exampleBAM.bam,27,56,1,M,1,0,40 -exampleBAM.bam,45,GGCTGGGG,0,D,1,0,40 -exampleBAM.bam,45,GATTAGAT,0,D,1,0,40 -exampleBAM.bam,33,35,1,M,1,0,40 -exampleBAM.bam,45,TAGATTTT,0,D,1,0,40 -exampleBAM.bam,45,GGTTGGGG,0,D,2,0,40 -exampleBAM.bam,19,CT,0,M,2,1,3 -exampleBAM.bam,45,19,1,D,5,0,40 -exampleBAM.bam,45,31,1,I,5,0,40 -exampleBAM.bam,45,TGTTGGTT,0,D,1,0,40 -exampleBAM.bam,45,TTCTGTGT,0,I,1,0,40 -exampleBAM.bam,24,62,1,M,1,0,40 -exampleBAM.bam,45,TCGGGTTC,0,D,1,0,40 -exampleBAM.bam,45,GTTTCACA,0,D,1,0,40 -exampleBAM.bam,45,TAGGGTTC,0,D,1,0,40 -exampleBAM.bam,45,TTGGGTTC,0,D,1,0,40 -exampleBAM.bam,30,TT,0,M,2,0,40 -exampleBAM.bam,30,17,1,M,2,0,40 -exampleBAM.bam,33,69,1,M,1,0,40 -exampleBAM.bam,6,36,1,M,1,0,40 -exampleBAM.bam,17,GT,0,M,1,0,40 -exampleBAM.bam,21,64,1,M,1,0,40 -exampleBAM.bam,34,AC,0,M,1,0,40 -exampleBAM.bam,16,GC,0,M,1,0,40 -exampleBAM.bam,45,CCTCGTCC,0,I,1,0,40 -exampleBAM.bam,45,49,1,D,5,0,40 -exampleBAM.bam,45,61,1,I,5,0,40 -exampleBAM.bam,45,AGGTTATC,0,D,1,0,40 -exampleBAM.bam,45,GGCACCCA,0,D,1,0,40 -exampleBAM.bam,45,TGTGTCTT,0,D,1,0,40 -exampleBAM.bam,45,TCAATAAT,0,D,1,0,40 -exampleBAM.bam,45,TCTCCAGG,0,D,1,0,40 -exampleBAM.bam,6,AA,0,M,2,0,40 -exampleBAM.bam,31,TC,0,M,1,0,40 -exampleBAM.bam,31,19,1,M,1,0,40 -exampleBAM.bam,8,58,1,M,1,0,40 -exampleBAM.bam,28,54,1,M,1,0,40 -exampleBAM.bam,45,GGTGGCCT,0,D,1,0,40 -exampleBAM.bam,18,10,1,M,1,0,40 -exampleBAM.bam,18,CA,0,M,2,0,40 -exampleBAM.bam,27,57,1,M,1,0,40 -exampleBAM.bam,21,AT,0,M,1,0,40 -exampleBAM.bam,45,TGTATTTG,0,D,1,0,40 -exampleBAM.bam,45,TTCTAGAG,0,D,1,0,40 -exampleBAM.bam,45,GGTTGGGT,0,D,1,0,40 -exampleBAM.bam,45,GTTTGGGT,0,D,1,0,40 -exampleBAM.bam,13,TA,0,M,1,0,40 -exampleBAM.bam,20,AC,0,M,1,0,40 -exampleBAM.bam,45,CCCAGATC,0,I,1,0,40 -exampleBAM.bam,32,2,1,M,2,0,40 -exampleBAM.bam,27,27,1,M,1,0,40 -exampleBAM.bam,6,67,1,M,1,0,40 -exampleBAM.bam,45,TAGGGTTA,0,D,1,0,40 -exampleBAM.bam,45,GTTGGTTA,0,D,1,0,40 -exampleBAM.bam,45,TCATGGTG,0,D,1,0,40 -exampleBAM.bam,45,TCTAATCT,0,D,1,0,40 -exampleBAM.bam,45,TTGGGTTA,0,D,1,0,40 -exampleBAM.bam,30,TG,0,M,1,0,40 -exampleBAM.bam,45,18,1,D,5,0,40 -exampleBAM.bam,45,30,1,I,5,0,40 -exampleBAM.bam,45,CCAGGTTA,0,D,1,0,40 -exampleBAM.bam,45,CAAAATCT,0,D,1,0,40 -exampleBAM.bam,25,31,1,M,1,0,40 -exampleBAM.bam,34,6,1,M,1,0,40 -exampleBAM.bam,34,AA,0,M,1,0,40 -exampleBAM.bam,17,GG,0,M,1,0,40 -exampleBAM.bam,23,35,1,M,1,0,40 -exampleBAM.bam,45,TCTTTATA,0,D,1,0,40 -exampleBAM.bam,45,GATCCAGT,0,D,1,0,40 -exampleBAM.bam,45,48,1,D,5,0,40 -exampleBAM.bam,45,60,1,I,5,0,40 -exampleBAM.bam,45,ATCCATTT,0,I,1,0,40 -exampleBAM.bam,45,AATGAGTC,0,I,1,0,40 -exampleBAM.bam,31,TA,0,M,2,0,40 -exampleBAM.bam,21,AA,0,M,1,0,40 -exampleBAM.bam,34,65,1,M,1,0,40 -exampleBAM.bam,45,CTCCAGGT,0,I,1,0,40 -exampleBAM.bam,18,CT,0,M,1,0,40 -exampleBAM.bam,33,3,1,M,1,0,40 -exampleBAM.bam,45,TCAGGCCA,0,D,1,0,40 -exampleBAM.bam,45,TTGCACTT,0,D,1,0,40 -exampleBAM.bam,28,53,1,M,1,0,40 -exampleBAM.bam,45,TTCACTGA,0,D,1,0,40 -exampleBAM.bam,19,CC,0,M,1,0,40 -exampleBAM.bam,32,1,1,M,1,0,40 -exampleBAM.bam,45,GATAACCT,0,I,1,0,40 -exampleBAM.bam,45,AACTGGGA,0,D,1,0,40 -exampleBAM.bam,16,73,1,M,1,0,40 -exampleBAM.bam,45,TCCATTTC,0,D,1,0,40 -exampleBAM.bam,21,66,1,M,1,0,40 -exampleBAM.bam,34,5,1,M,1,0,40 -exampleBAM.bam,34,AT,0,M,6,0,40 -exampleBAM.bam,16,47,1,M,1,0,40 -exampleBAM.bam,45,CACATGAT,0,D,1,0,40 -exampleBAM.bam,45,17,1,D,5,0,40 -exampleBAM.bam,45,29,1,I,5,0,40 -exampleBAM.bam,45,ATCAATAA,0,I,1,0,40 -exampleBAM.bam,45,ACCATGAT,0,D,1,0,40 -exampleBAM.bam,45,TCGGGTTT,0,D,2,0,40 -exampleBAM.bam,45,TCCATGAT,0,D,1,0,40 -exampleBAM.bam,6,AG,0,M,1,1,1 -exampleBAM.bam,6,4,1,M,1,0,40 -exampleBAM.bam,31,TT,0,M,1,0,40 -exampleBAM.bam,45,ATGATAAC,0,D,1,0,40 -exampleBAM.bam,45,51,1,D,5,0,40 -exampleBAM.bam,45,63,1,I,5,0,40 -exampleBAM.bam,45,CGTGAGTG,0,I,1,0,40 -exampleBAM.bam,45,CACCCAGA,0,D,1,0,40 -exampleBAM.bam,16,GT,0,M,1,0,40 -exampleBAM.bam,5,70,1,M,1,0,40 -exampleBAM.bam,45,GCTTTATT,0,D,1,0,40 -exampleBAM.bam,45,ATGGTGGC,0,I,1,0,40 -exampleBAM.bam,45,TTATTATT,0,D,1,0,40 -exampleBAM.bam,34,64,1,M,1,0,40 -exampleBAM.bam,21,AC,0,M,3,0,40 -exampleBAM.bam,33,2,1,M,1,0,40 -exampleBAM.bam,45,TTTCACTG,0,D,1,0,40 -exampleBAM.bam,45,TCGTGAGT,0,D,1,0,40 -exampleBAM.bam,45,GTGTCTTT,0,I,1,0,40 -exampleBAM.bam,45,TAATGAGT,0,D,1,0,40 -exampleBAM.bam,45,TACTCTTT,0,I,1,0,40 -exampleBAM.bam,45,CACTTTCA,0,I,1,0,40 -exampleBAM.bam,45,CCATTTCA,0,I,1,0,40 -exampleBAM.bam,45,ATATAAAG,0,D,1,0,40 -exampleBAM.bam,45,GAGTTTCA,0,I,1,0,40 -exampleBAM.bam,45,CCAGGCAC,0,I,1,0,40 -exampleBAM.bam,29,54,1,M,1,0,40 -exampleBAM.bam,6,65,1,M,1,0,40 -exampleBAM.bam,19,10,1,M,1,0,40 -exampleBAM.bam,19,CA,0,M,2,0,40 -exampleBAM.bam,45,TTTCTGTG,0,D,1,0,40 -exampleBAM.bam,33,32,1,M,1,0,40 -exampleBAM.bam,45,GTTTGGGC,0,D,1,0,40 -exampleBAM.bam,45,TGGAGATT,0,I,1,0,40 -exampleBAM.bam,45,ATTAGATT,0,I,1,0,40 -exampleBAM.bam,34,4,1,M,1,0,40 -exampleBAM.bam,21,67,1,M,1,0,40 -exampleBAM.bam,45,TGGGGTTG,0,D,1,0,40 -exampleBAM.bam,45,TGCAATCC,0,D,1,0,40 -exampleBAM.bam,45,GGGGGTTG,0,D,1,0,40 -exampleBAM.bam,45,TAGGGTTG,0,D,1,0,40 -exampleBAM.bam,45,TTAATGAG,0,D,1,0,40 -exampleBAM.bam,30,18,1,M,1,0,40 -exampleBAM.bam,30,TA,0,M,4,0,40 -exampleBAM.bam,45,16,1,D,5,0,40 -exampleBAM.bam,45,28,1,I,5,0,40 -exampleBAM.bam,45,ACATGGTA,0,D,1,0,40 -exampleBAM.bam,45,GAGTCAAT,0,I,1,0,40 -exampleBAM.bam,45,CAATGTGA,0,I,1,0,40 -exampleBAM.bam,45,AATCTCCA,0,I,1,0,40 -exampleBAM.bam,45,ATTTCACT,0,D,1,0,40 -exampleBAM.bam,45,ATATCAAT,0,I,1,0,40 -exampleBAM.bam,8,57,1,M,1,1,1 -exampleBAM.bam,34,38,1,M,1,0,40 -exampleBAM.bam,31,16,1,M,1,0,40 -exampleBAM.bam,31,TG,0,M,3,0,40 -exampleBAM.bam,45,GGGTTCGG,0,I,3,0,40 -exampleBAM.bam,45,CTAGAGTT,0,I,1,0,40 -exampleBAM.bam,45,50,1,D,5,0,40 -exampleBAM.bam,45,62,1,I,5,0,40 -exampleBAM.bam,45,GATATAAA,0,D,1,0,40 -exampleBAM.bam,45,GCCACCAT,0,I,1,0,40 -exampleBAM.bam,45,ACCTGGAG,0,I,1,0,40 -exampleBAM.bam,5,AG,0,M,1,0,40 -exampleBAM.bam,45,AGGTGGAG,0,I,1,0,40 -exampleBAM.bam,45,GCAAAATC,0,D,1,0,40 -exampleBAM.bam,45,CACAGCAA,0,I,1,0,40 -exampleBAM.bam,28,TT,0,M,1,0,40 -exampleBAM.bam,33,39,1,M,1,0,40 -exampleBAM.bam,19,GT,0,M,1,0,40 -exampleBAM.bam,23,64,1,M,2,0,40 -exampleBAM.bam,27,30,1,M,1,0,40 -exampleBAM.bam,32,AC,0,M,1,0,40 -exampleBAM.bam,45,AAGTGACA,0,D,1,0,40 -exampleBAM.bam,5,38,1,M,1,0,40 -exampleBAM.bam,45,AGAGTTTC,0,D,1,0,40 -exampleBAM.bam,45,AGTGACAT,0,D,1,0,40 -exampleBAM.bam,45,GCCTGAAA,0,I,1,0,40 -exampleBAM.bam,45,CTCTTTGT,0,I,1,0,40 -exampleBAM.bam,33,AT,0,M,2,0,40 -exampleBAM.bam,45,TGGCAGCC,0,I,1,0,40 -exampleBAM.bam,4,AA,0,M,1,0,40 -exampleBAM.bam,29,TC,0,M,1,0,40 -exampleBAM.bam,34,71,1,M,1,0,40 -exampleBAM.bam,45,AGTTTCAC,0,D,1,0,40 -exampleBAM.bam,45,CATTTCAC,0,D,1,0,40 -exampleBAM.bam,45,53,1,D,5,0,40 -exampleBAM.bam,45,57,1,I,5,0,40 -exampleBAM.bam,45,CATGATAA,0,I,1,0,40 -exampleBAM.bam,45,TAGAGTTT,0,D,1,0,40 -exampleBAM.bam,45,GGTTCGGG,0,D,3,0,40 -exampleBAM.bam,45,CTTTATTA,0,I,1,0,40 -exampleBAM.bam,45,CTTTGTAT,0,D,1,0,40 -exampleBAM.bam,45,AGCCTCGT,0,I,1,0,40 -exampleBAM.bam,45,CTGTGTCT,0,I,1,0,40 -exampleBAM.bam,45,CTTAAGTG,0,I,1,0,40 -exampleBAM.bam,45,ATTCTATT,0,D,1,0,40 -exampleBAM.bam,45,CTAATCTC,0,D,1,0,40 -exampleBAM.bam,45,23,1,D,5,0,40 -exampleBAM.bam,45,27,1,I,5,0,40 -exampleBAM.bam,30,21,1,M,1,0,40 -exampleBAM.bam,45,TGAAAGTG,0,I,1,0,40 -exampleBAM.bam,45,TGGTATTA,0,I,1,0,40 -exampleBAM.bam,23,38,1,M,1,0,40 -exampleBAM.bam,34,3,1,M,1,0,40 -exampleBAM.bam,45,GGTTAGGG,0,I,2,0,40 -exampleBAM.bam,45,GTGCAAAG,0,D,1,0,40 -exampleBAM.bam,28,TG,0,M,3,0,40 -exampleBAM.bam,45,ATTCTTAA,0,D,1,0,40 -exampleBAM.bam,45,GAGCCTTT,0,I,1,0,40 -exampleBAM.bam,27,31,1,M,1,0,40 -exampleBAM.bam,29,48,1,M,1,0,40 -exampleBAM.bam,32,AA,0,M,1,0,40 -exampleBAM.bam,19,GG,0,M,2,0,40 -exampleBAM.bam,4,37,1,M,1,0,40 -exampleBAM.bam,45,GGGTTTGG,0,I,2,0,40 -exampleBAM.bam,33,AG,0,M,3,0,40 -exampleBAM.bam,28,50,1,M,1,0,40 -exampleBAM.bam,45,ATTACTCT,0,D,1,0,40 -exampleBAM.bam,45,ACACAGCA,0,I,1,0,40 -exampleBAM.bam,45,ATGTGAAC,0,I,1,0,40 -exampleBAM.bam,32,36,1,M,2,0,40 -exampleBAM.bam,29,TA,0,M,2,0,40 -exampleBAM.bam,34,70,1,M,1,0,40 -exampleBAM.bam,17,76,1,M,1,1,1 -exampleBAM.bam,30,54,1,M,1,0,40 -exampleBAM.bam,24,25,1,M,1,0,40 -exampleBAM.bam,45,ATCGTGAG,0,D,1,0,40 -exampleBAM.bam,45,GATCGTGA,0,I,1,0,40 -exampleBAM.bam,45,52,1,D,5,0,40 -exampleBAM.bam,45,56,1,I,5,0,40 -exampleBAM.bam,45,CCAGATCC,0,D,1,0,40 -exampleBAM.bam,16,CA,0,M,1,0,40 -exampleBAM.bam,8,63,1,M,1,0,40 -exampleBAM.bam,14,TG,0,M,1,0,40 -exampleBAM.bam,23,AT,0,M,3,0,40 -exampleBAM.bam,19,72,1,M,1,0,40 -exampleBAM.bam,30,20,1,M,1,0,40 -exampleBAM.bam,45,TTCTATTC,0,I,1,0,40 -exampleBAM.bam,45,GTCAATGT,0,D,1,0,40 -exampleBAM.bam,45,AAAATCTA,0,D,1,0,40 -exampleBAM.bam,45,22,1,D,5,0,40 -exampleBAM.bam,45,26,1,I,5,0,40 -exampleBAM.bam,34,2,1,M,1,0,40 -exampleBAM.bam,19,GC,0,M,1,0,40 -exampleBAM.bam,6,68,1,M,1,1,1 -exampleBAM.bam,23,66,1,M,1,0,40 -exampleBAM.bam,27,28,1,M,1,0,40 -exampleBAM.bam,32,AT,0,M,2,0,40 -exampleBAM.bam,5,AA,0,M,1,0,40 -exampleBAM.bam,45,TATTACTC,0,D,1,0,40 -exampleBAM.bam,33,37,1,M,1,0,40 -exampleBAM.bam,45,TGGGCTGG,0,D,1,0,40 -exampleBAM.bam,28,TC,0,M,1,0,40 -exampleBAM.bam,4,AG,0,M,1,0,40 -exampleBAM.bam,29,TT,0,M,2,0,40 -exampleBAM.bam,18,GT,0,M,1,0,40 -exampleBAM.bam,45,AAAGACAC,0,D,1,0,40 -exampleBAM.bam,45,GCCTTTGC,0,I,1,0,40 -exampleBAM.bam,45,ACCCAGAT,0,D,1,0,40 -exampleBAM.bam,45,TCTTAAGT,0,I,1,0,40 -exampleBAM.bam,13,55,1,M,1,0,40 -exampleBAM.bam,45,GTATTTGC,0,I,1,0,40 -exampleBAM.bam,33,7,1,M,1,0,40 -exampleBAM.bam,33,AC,0,M,1,0,40 -exampleBAM.bam,23,AA,0,M,1,0,40 -exampleBAM.bam,8,60,1,M,1,0,40 -exampleBAM.bam,22,38,1,M,1,0,40 -exampleBAM.bam,45,CATGATCG,0,D,1,0,40 -exampleBAM.bam,45,55,1,D,5,0,40 -exampleBAM.bam,45,59,1,I,5,0,40 -exampleBAM.bam,45,TCCAGTTC,0,D,1,0,40 -exampleBAM.bam,45,GTGACATG,0,I,1,0,40 -exampleBAM.bam,45,TTCACATG,0,I,1,0,40 -exampleBAM.bam,45,TAAGTGAC,0,D,1,0,40 -exampleBAM.bam,4,64,1,M,1,1,1 -exampleBAM.bam,25,24,1,M,1,0,40 -exampleBAM.bam,22,AG,0,M,2,0,40 -exampleBAM.bam,45,CTTTCAGG,0,D,1,0,40 -exampleBAM.bam,45,ATCATGGT,0,I,1,0,40 -exampleBAM.bam,45,21,1,D,5,0,40 -exampleBAM.bam,45,25,1,I,5,0,40 -exampleBAM.bam,45,GACATGGT,0,I,1,0,40 -exampleBAM.bam,30,23,1,M,1,0,40 -exampleBAM.bam,33,67,1,M,1,0,40 -exampleBAM.bam,24,56,1,M,1,0,40 -exampleBAM.bam,45,TATTATTG,0,I,1,0,40 -exampleBAM.bam,45,GTTAATGA,0,D,1,0,40 -exampleBAM.bam,32,AG,0,M,1,0,40 -exampleBAM.bam,23,67,1,M,1,0,40 -exampleBAM.bam,45,TGGAGCCT,0,D,1,0,40 -exampleBAM.bam,45,TGGTGGCC,0,D,1,0,40 -exampleBAM.bam,28,TA,0,M,1,0,40 -exampleBAM.bam,45,CAGCAAAA,0,D,1,0,40 -exampleBAM.bam,45,GGCAGCCT,0,D,1,0,40 -exampleBAM.bam,34,68,1,M,1,0,40 -exampleBAM.bam,21,3,1,M,1,0,40 -exampleBAM.bam,45,TCTTTGTA,0,D,1,0,40 -exampleBAM.bam,45,GTTCGGGT,0,D,3,0,40 -exampleBAM.bam,28,48,1,M,1,0,40 -exampleBAM.bam,33,AA,0,M,1,0,40 -exampleBAM.bam,18,GG,0,M,1,0,40 -exampleBAM.bam,45,CGGGTTTG,0,D,2,0,40 -exampleBAM.bam,34,34,1,M,1,0,40 -exampleBAM.bam,23,AC,0,M,1,0,40 -exampleBAM.bam,30,52,1,M,1,0,40 -exampleBAM.bam,24,27,1,M,1,0,40 -exampleBAM.bam,45,AGGCCACC,0,D,1,0,40 -exampleBAM.bam,20,69,1,M,1,0,40 -exampleBAM.bam,45,AAAGTGCA,0,I,1,0,40 -exampleBAM.bam,45,ATTGATAT,0,I,1,0,40 -exampleBAM.bam,45,AATGTGAA,0,D,1,0,40 -exampleBAM.bam,45,54,1,D,5,0,40 -exampleBAM.bam,45,58,1,I,5,0,40 -exampleBAM.bam,45,ACTTTCAG,0,D,1,0,40 -exampleBAM.bam,23,37,1,M,1,0,40 -exampleBAM.bam,21,71,1,M,1,0,40 -exampleBAM.bam,33,66,1,M,1,0,40 -exampleBAM.bam,15,TG,0,M,1,0,40 -exampleBAM.bam,45,TTGTATTT,0,I,1,0,40 -exampleBAM.bam,45,20,1,D,5,0,40 -exampleBAM.bam,45,24,1,I,5,0,40 -exampleBAM.bam,45,CAGGCCAC,0,I,1,0,40 -exampleBAM.bam,23,59,1,M,1,0,40 -exampleBAM.bam,17,20,1,M,1,0,40 -exampleBAM.bam,30,CG,0,M,1,0,40 -exampleBAM.bam,45,TTGATATA,0,I,1,0,40 -exampleBAM.bam,45,TTCTTAAG,0,I,1,0,40 -exampleBAM.bam,15,14,1,M,1,0,40 -exampleBAM.bam,45,GAACTGGG,0,D,1,0,40 -exampleBAM.bam,45,6,1,I,5,0,40 -exampleBAM.bam,45,10,1,D,5,0,40 -exampleBAM.bam,45,GGGCTGGG,0,D,1,0,40 -exampleBAM.bam,31,10,1,M,1,0,40 -exampleBAM.bam,34,60,1,M,1,0,40 -exampleBAM.bam,25,37,1,M,1,0,40 -exampleBAM.bam,6,31,1,M,1,1,1 -exampleBAM.bam,30,42,1,M,1,0,40 -exampleBAM.bam,45,GTTCTAGA,0,D,1,0,40 -exampleBAM.bam,45,TATTTGCA,0,D,1,0,40 -exampleBAM.bam,24,5,1,M,1,0,40 -exampleBAM.bam,45,CCTTTGCA,0,D,1,0,40 -exampleBAM.bam,45,CAGGCACC,0,I,1,0,40 -exampleBAM.bam,45,36,1,I,5,0,40 -exampleBAM.bam,45,40,1,D,5,0,40 -exampleBAM.bam,29,GA,0,M,2,0,40 -exampleBAM.bam,21,29,1,M,1,0,40 -exampleBAM.bam,45,TAATCTCC,0,I,1,0,40 -exampleBAM.bam,15,74,1,M,1,0,40 -exampleBAM.bam,45,TTGGGGGT,0,I,1,0,40 -exampleBAM.bam,33,24,1,M,1,0,40 -exampleBAM.bam,45,GTTGGGGT,0,I,1,0,40 -exampleBAM.bam,45,GCTGGGGT,0,I,1,0,40 -exampleBAM.bam,45,66,1,I,5,0,40 -exampleBAM.bam,45,CTTGGCTT,0,D,1,0,40 -exampleBAM.bam,45,GGCCACCA,0,D,1,0,40 -exampleBAM.bam,19,TG,0,M,2,0,40 -exampleBAM.bam,45,TTCAGGCC,0,I,1,0,40 -exampleBAM.bam,45,GGTTAATG,0,I,1,0,40 -exampleBAM.bam,45,GGTGGAGC,0,I,1,0,40 -exampleBAM.bam,28,GG,0,M,3,0,40 -exampleBAM.bam,45,GAGATTAG,0,I,1,0,40 -exampleBAM.bam,45,7,1,I,5,0,40 -exampleBAM.bam,45,11,1,D,5,0,40 -exampleBAM.bam,45,TTACTCTT,0,I,1,0,40 -exampleBAM.bam,30,9,1,M,1,0,40 -exampleBAM.bam,45,TTTATATC,0,I,1,0,40 -exampleBAM.bam,45,TGGTTAAT,0,I,1,0,40 -exampleBAM.bam,45,GTATTACT,0,D,1,0,40 -exampleBAM.bam,31,11,1,M,1,0,40 -exampleBAM.bam,31,CC,0,M,1,0,40 -exampleBAM.bam,34,61,1,M,1,0,40 -exampleBAM.bam,25,36,1,M,1,0,40 -exampleBAM.bam,45,ACAGCAAA,0,D,1,0,40 -exampleBAM.bam,45,AGTGCAAA,0,D,1,0,40 -exampleBAM.bam,45,37,1,I,5,0,40 -exampleBAM.bam,45,41,1,D,5,0,40 -exampleBAM.bam,45,TCCAGGTT,0,I,1,0,40 -exampleBAM.bam,45,GTGAGTGT,0,D,1,0,40 -exampleBAM.bam,45,TTATCATG,0,D,1,0,40 -exampleBAM.bam,24,AG,0,M,2,0,40 -exampleBAM.bam,29,GC,0,M,1,0,40 -exampleBAM.bam,32,57,1,M,1,0,40 -exampleBAM.bam,45,67,1,I,5,0,40 -exampleBAM.bam,18,19,1,M,1,0,40 -exampleBAM.bam,45,CTGGAGAT,0,I,1,0,40 -exampleBAM.bam,45,AGATTTTT,0,I,1,0,40 -exampleBAM.bam,45,AAATCTAA,0,D,1,0,40 -exampleBAM.bam,45,CTGAAAGT,0,D,1,0,40 -exampleBAM.bam,45,AGGCACCC,0,D,1,0,40 -exampleBAM.bam,45,TCTGTGTC,0,I,1,0,40 -exampleBAM.bam,45,TTGGGCTG,0,D,1,0,40 -exampleBAM.bam,28,47,1,M,1,0,40 -exampleBAM.bam,45,GTTGGGGG,0,I,1,0,40 -exampleBAM.bam,19,TT,0,M,2,0,40 -exampleBAM.bam,29,45,1,M,1,0,40 -exampleBAM.bam,45,CCTGGAGA,0,I,1,0,40 -exampleBAM.bam,45,ATGATTCT,0,D,1,0,40 -exampleBAM.bam,45,GCCAGGCA,0,I,1,0,40 -exampleBAM.bam,45,TTTATTAT,0,I,1,0,40 -exampleBAM.bam,33,59,1,M,1,0,40 -exampleBAM.bam,45,TCTATTCT,0,D,1,0,40 -exampleBAM.bam,45,TAACCTGG,0,I,1,0,40 -exampleBAM.bam,30,CA,0,M,3,0,40 -exampleBAM.bam,15,GG,0,M,2,0,40 -exampleBAM.bam,45,GACACAGC,0,I,1,0,40 -exampleBAM.bam,45,AACCTGGA,0,D,1,0,40 -exampleBAM.bam,45,4,1,I,5,0,40 -exampleBAM.bam,45,8,1,D,5,0,40 -exampleBAM.bam,25,AT,0,M,2,0,40 -exampleBAM.bam,6,63,1,M,2,0,40 -exampleBAM.bam,45,TTTGCAAT,0,D,1,0,40 -exampleBAM.bam,45,TTTGCACT,0,I,1,0,40 -exampleBAM.bam,45,TTAAGTGA,0,D,1,0,40 -exampleBAM.bam,45,TGAGTCAA,0,I,1,0,40 -exampleBAM.bam,22,59,1,M,1,0,40 -exampleBAM.bam,45,CTCGTCCA,0,D,1,0,40 -exampleBAM.bam,45,38,1,I,5,0,40 -exampleBAM.bam,45,42,1,D,5,0,40 -exampleBAM.bam,34,62,1,M,1,0,40 -exampleBAM.bam,31,CG,0,M,1,0,40 -exampleBAM.bam,31,8,1,M,2,0,40 -exampleBAM.bam,27,69,1,M,1,0,40 -exampleBAM.bam,26,3,1,M,1,0,40 -exampleBAM.bam,45,TATAAAGA,0,D,1,0,40 -exampleBAM.bam,45,GGGGTTGG,0,D,2,0,40 -exampleBAM.bam,45,64,1,I,5,0,40 -exampleBAM.bam,45,76,1,D,5,0,40 -exampleBAM.bam,45,GATTCTAT,0,D,1,0,40 -exampleBAM.bam,45,AGACACAG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTGG,0,D,1,0,40 -exampleBAM.bam,45,AGTGTTGG,0,D,1,0,40 -exampleBAM.bam,29,12,1,M,1,0,40 -exampleBAM.bam,29,GG,0,M,4,0,40 -exampleBAM.bam,8,71,1,M,1,0,40 -exampleBAM.bam,45,GTGAACTG,0,I,1,0,40 -exampleBAM.bam,45,TTGGCTTT,0,D,1,0,40 -exampleBAM.bam,9,69,1,M,1,0,40 -exampleBAM.bam,45,CCTGAAAG,0,I,1,0,40 -exampleBAM.bam,45,CTTTGCAC,0,D,1,0,40 -exampleBAM.bam,20,29,1,M,1,0,40 -exampleBAM.bam,12,40,1,M,1,0,40 -exampleBAM.bam,32,24,1,M,1,0,40 -exampleBAM.bam,21,61,1,M,1,0,40 -exampleBAM.bam,45,CATGGTAT,0,I,1,0,40 -exampleBAM.bam,45,GCACCCAG,0,D,1,0,40 -exampleBAM.bam,16,55,1,M,1,0,40 -exampleBAM.bam,45,ATGATCGT,0,D,1,0,40 -exampleBAM.bam,45,5,1,I,5,0,40 -exampleBAM.bam,45,9,1,D,5,0,40 -exampleBAM.bam,30,CC,0,M,2,0,40 -exampleBAM.bam,23,56,1,M,1,0,40 -exampleBAM.bam,6,62,1,M,1,0,40 -exampleBAM.bam,31,43,1,M,1,0,40 -exampleBAM.bam,25,AG,0,M,1,0,40 -exampleBAM.bam,45,ATAACCTG,0,D,1,0,40 -exampleBAM.bam,45,39,1,I,5,0,40 -exampleBAM.bam,45,43,1,D,5,0,40 -exampleBAM.bam,45,GAAAGTGC,0,D,1,0,40 -exampleBAM.bam,24,AA,0,M,1,0,40 -exampleBAM.bam,24,6,1,M,2,0,40 -exampleBAM.bam,45,TTATTGAT,0,I,1,0,40 -exampleBAM.bam,34,63,1,M,1,0,40 -exampleBAM.bam,31,CT,0,M,1,0,40 -exampleBAM.bam,45,65,1,I,5,0,40 -exampleBAM.bam,18,TT,0,M,1,1,1 -exampleBAM.bam,45,GATTTTTC,0,I,1,0,40 -exampleBAM.bam,45,AGTTCTAG,0,D,1,0,40 -exampleBAM.bam,45,TAAAGACA,0,I,1,0,40 -exampleBAM.bam,45,TGAGTGTT,0,I,1,0,40 -exampleBAM.bam,45,TTTCACAT,0,I,1,0,40 -exampleBAM.bam,45,GTGGAGCC,0,D,1,0,40 -exampleBAM.bam,19,49,1,M,1,0,40 -exampleBAM.bam,29,GT,0,M,2,0,40 -exampleBAM.bam,5,26,1,M,1,1,1 -exampleBAM.bam,45,AAGTGCAA,0,D,1,0,40 -exampleBAM.bam,45,ATTTGCAA,0,D,1,0,40 -exampleBAM.bam,45,ATCTAATC,0,I,1,0,40 -exampleBAM.bam,20,28,1,M,1,1,1 -exampleBAM.bam,45,GGTATTAC,0,I,1,0,40 -exampleBAM.bam,45,TGTGAACT,0,D,1,0,40 -exampleBAM.bam,45,TGGCCTGA,0,I,1,0,40 -exampleBAM.bam,33,57,1,M,1,0,40 -exampleBAM.bam,21,60,1,M,1,0,40 -exampleBAM.bam,29,47,1,M,1,0,40 -exampleBAM.bam,34,56,1,M,1,0,40 -exampleBAM.bam,31,GA,0,M,2,0,40 -exampleBAM.bam,45,TCGTCCAT,0,D,1,0,40 -exampleBAM.bam,45,TGATTCTA,0,I,1,0,40 -exampleBAM.bam,45,ATCCAGTT,0,D,1,0,40 -exampleBAM.bam,45,32,1,I,5,0,40 -exampleBAM.bam,45,44,1,D,5,0,40 -exampleBAM.bam,45,CATGATTC,0,D,1,0,40 -exampleBAM.bam,45,CAATCCAT,0,D,1,0,40 -exampleBAM.bam,45,CAGTTCTA,0,I,1,0,40 -exampleBAM.bam,34,26,1,M,1,0,40 -exampleBAM.bam,8,AT,0,M,1,1,1 -exampleBAM.bam,45,GGGTTAGG,0,D,2,0,40 -exampleBAM.bam,30,12,1,M,1,0,40 -exampleBAM.bam,45,TATATCAA,0,I,1,0,40 -exampleBAM.bam,45,GCAATCCA,0,D,1,0,40 -exampleBAM.bam,45,GGAGCCTT,0,D,1,0,40 -exampleBAM.bam,45,CAGATCCA,0,D,1,0,40 -exampleBAM.bam,45,2,1,I,5,0,40 -exampleBAM.bam,45,14,1,D,5,0,40 -exampleBAM.bam,45,GAGTGTTG,0,I,1,0,40 -exampleBAM.bam,32,30,1,M,1,0,40 -exampleBAM.bam,27,AC,0,M,1,0,40 -exampleBAM.bam,21,59,1,M,1,0,40 -exampleBAM.bam,45,TGTCTTTA,0,I,1,0,40 -exampleBAM.bam,45,TCAATGTG,0,I,1,0,40 -exampleBAM.bam,45,TGGCTTTA,0,I,1,0,40 -exampleBAM.bam,13,GA,0,M,1,0,40 -exampleBAM.bam,45,CCATGATT,0,D,1,0,40 -exampleBAM.bam,29,CA,0,M,1,0,40 -exampleBAM.bam,19,54,1,M,1,0,40 -exampleBAM.bam,45,TATCAATA,0,I,1,0,40 -exampleBAM.bam,45,TTTGGGCT,0,I,1,0,40 -exampleBAM.bam,45,TTGGTTAA,0,I,1,0,40 -exampleBAM.bam,45,TGCACTTT,0,D,1,0,40 -exampleBAM.bam,45,TCTAGAGT,0,I,1,0,40 -exampleBAM.bam,26,AT,0,M,1,0,40 -exampleBAM.bam,20,57,1,M,1,0,40 -exampleBAM.bam,45,GCCTCGTC,0,D,1,0,40 -exampleBAM.bam,45,70,1,I,5,0,40 -exampleBAM.bam,45,74,1,D,5,0,40 -exampleBAM.bam,18,22,1,M,1,0,40 -exampleBAM.bam,25,32,1,M,1,0,40 -exampleBAM.bam,27,66,1,M,1,0,40 -exampleBAM.bam,31,15,1,M,2,0,40 -exampleBAM.bam,31,GC,0,M,3,0,40 -exampleBAM.bam,45,33,1,I,5,0,40 -exampleBAM.bam,45,45,1,D,5,0,40 -exampleBAM.bam,45,GGAGATTA,0,D,1,0,40 -exampleBAM.bam,45,AGATCCAG,0,D,1,0,40 -exampleBAM.bam,16,19,1,M,1,0,40 -exampleBAM.bam,45,ATGGTATT,0,I,1,0,40 -exampleBAM.bam,45,ATCTCCAG,0,D,1,0,40 -exampleBAM.bam,13,75,1,M,1,0,40 -exampleBAM.bam,45,TTTGTATT,0,I,1,0,40 -exampleBAM.bam,45,TATCATGG,0,I,1,0,40 -exampleBAM.bam,45,TGACATGG,0,I,1,0,40 -exampleBAM.bam,17,TT,0,M,3,1,5 -exampleBAM.bam,31,45,1,M,1,0,40 -exampleBAM.bam,8,AG,0,M,2,0,40 -exampleBAM.bam,34,27,1,M,1,0,40 -exampleBAM.bam,45,3,1,I,5,0,40 -exampleBAM.bam,45,15,1,D,5,0,40 -exampleBAM.bam,45,TTATATCA,0,I,1,0,40 -exampleBAM.bam,45,TGATATAA,0,D,1,0,40 -exampleBAM.bam,45,GGTTATCA,0,I,1,0,40 -exampleBAM.bam,45,TCACTGAT,0,I,1,0,40 -exampleBAM.bam,45,GTGGCCTG,0,D,1,0,40 -exampleBAM.bam,19,21,1,M,2,0,40 -exampleBAM.bam,32,31,1,M,1,0,40 -exampleBAM.bam,27,AA,0,M,1,0,40 -exampleBAM.bam,45,CACTGATG,0,D,1,0,40 -exampleBAM.bam,45,ATAAAGAC,0,I,1,0,40 -exampleBAM.bam,45,GCACTTTC,0,I,1,0,40 -exampleBAM.bam,45,CAGCCTCG,0,I,1,0,40 -exampleBAM.bam,28,CT,0,M,2,0,40 -exampleBAM.bam,45,71,1,I,5,0,40 -exampleBAM.bam,45,75,1,D,5,0,40 -exampleBAM.bam,45,AGCAAAAT,0,I,1,0,40 -exampleBAM.bam,45,TTGCAATC,0,I,1,0,40 -exampleBAM.bam,33,29,1,M,2,0,40 -exampleBAM.bam,26,AG,0,M,1,0,40 -exampleBAM.bam,45,GGTTTGGG,0,D,2,0,40 -exampleBAM.bam,45,GGGTTGGG,0,D,3,0,40 -exampleBAM.bam,24,3,1,M,1,0,40 -exampleBAM.bam,45,TTTTTCTG,0,I,1,0,40 -exampleBAM.bam,45,TTAGATTT,0,D,1,0,40 -exampleBAM.bam,16,TG,0,M,2,0,40 -exampleBAM.bam,45,34,1,I,5,0,40 -exampleBAM.bam,45,46,1,D,5,0,40 -exampleBAM.bam,45,ATGAGTCA,0,D,1,0,40 -exampleBAM.bam,27,65,1,M,1,0,40 -exampleBAM.bam,31,12,1,M,1,0,40 -exampleBAM.bam,31,GG,0,M,4,0,40 -exampleBAM.bam,34,58,1,M,1,0,40 -exampleBAM.bam,24,33,1,M,1,0,40 -exampleBAM.bam,15,8,1,M,1,0,40 -exampleBAM.bam,26,67,1,M,1,0,40 -exampleBAM.bam,30,GA,0,M,2,0,40 -exampleBAM.bam,45,12,1,D,5,0,40 -exampleBAM.bam,45,GGCCTGAA,0,I,1,0,40 -exampleBAM.bam,45,AGATTAGA,0,D,1,0,40 -exampleBAM.bam,45,GCAGCCTC,0,D,1,0,40 -exampleBAM.bam,45,CATGGTGG,0,D,1,0,40 -exampleBAM.bam,45,AATCCATT,0,D,1,0,40 -exampleBAM.bam,45,CTTTATAT,0,D,1,0,40 -exampleBAM.bam,29,76,1,M,1,0,40 -exampleBAM.bam,23,61,1,M,1,0,40 -exampleBAM.bam,28,CA,0,M,2,0,40 -exampleBAM.bam,45,GTTAGGGT,0,I,3,0,40 -exampleBAM.bam,45,ACTCTTTG,0,I,1,0,40 -exampleBAM.bam,45,AGCCTTTG,0,I,1,0,40 -exampleBAM.bam,45,ACATGATC,0,D,1,0,40 -exampleBAM.bam,45,ATTATTGA,0,D,1,0,40 -exampleBAM.bam,32,28,1,M,2,0,40 -exampleBAM.bam,29,42,1,M,1,0,40 -exampleBAM.bam,27,AT,0,M,4,0,40 -exampleBAM.bam,45,TGGGTTAG,0,I,1,0,40 -exampleBAM.bam,45,TGGGTTCG,0,D,1,0,40 -exampleBAM.bam,26,7,1,M,1,0,40 -exampleBAM.bam,45,TTTTCTGT,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTAG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTCG,0,D,1,0,40 -exampleBAM.bam,45,CGGGTTCG,0,D,1,0,40 -exampleBAM.bam,45,68,1,I,5,0,40 -exampleBAM.bam,45,72,1,D,5,0,40 -exampleBAM.bam,45,AGTCAATG,0,I,1,0,40 -exampleBAM.bam,29,8,1,M,1,0,40 -exampleBAM.bam,29,CG,0,M,2,0,40 -exampleBAM.bam,4,29,1,M,1,0,40 -exampleBAM.bam,16,TT,0,M,4,1,6 -exampleBAM.bam,45,CACCATGA,0,I,1,0,40 -exampleBAM.bam,45,35,1,I,5,0,40 -exampleBAM.bam,45,47,1,D,5,0,40 -exampleBAM.bam,45,CTATTCTT,0,I,1,0,40 -exampleBAM.bam,45,AATCTAAT,0,I,1,0,40 -exampleBAM.bam,45,GTGTTGGT,0,D,1,0,40 -exampleBAM.bam,30,45,1,M,1,0,40 -exampleBAM.bam,45,TCACATGA,0,I,1,0,40 -exampleBAM.bam,9,AG,0,M,1,0,40 -exampleBAM.bam,45,GTCCATGA,0,I,1,0,40 -exampleBAM.bam,31,13,1,M,1,0,40 -exampleBAM.bam,31,GT,0,M,1,0,40 -exampleBAM.bam,34,59,1,M,1,0,40 -exampleBAM.bam,45,AAGACACA,0,I,1,0,40 -exampleBAM.bam,45,CCACCATG,0,D,1,0,40 -exampleBAM.bam,45,1,1,I,5,0,40 -exampleBAM.bam,45,13,1,D,5,0,40 -exampleBAM.bam,16,51,1,M,1,0,40 -exampleBAM.bam,45,CGTCCATG,0,D,1,0,40 -exampleBAM.bam,45,CTGGGGTT,0,I,1,0,40 -exampleBAM.bam,45,GTTGGGTT,0,I,1,0,40 -exampleBAM.bam,45,TTCGGGTT,0,I,3,0,40 -exampleBAM.bam,45,TTAGGGTT,0,I,3,0,40 -exampleBAM.bam,45,TGGGGGTT,0,I,1,0,40 -exampleBAM.bam,45,TTTGGGTT,0,I,1,0,40 -exampleBAM.bam,45,TTGGGGTT,0,I,1,0,40 -exampleBAM.bam,9,38,1,M,1,0,40 -exampleBAM.bam,45,GTTATCAT,0,I,1,0,40 -exampleBAM.bam,30,GC,0,M,1,0,40 -exampleBAM.bam,17,TC,0,M,1,0,40 -exampleBAM.bam,34,25,1,M,1,0,40 -exampleBAM.bam,45,CCATGATA,0,D,1,0,40 -exampleBAM.bam,28,11,1,M,1,0,40 -exampleBAM.bam,45,TATTGATA,0,D,1,0,40 -exampleBAM.bam,29,43,1,M,1,0,40 -exampleBAM.bam,45,CCAGTTCT,0,D,1,0,40 -exampleBAM.bam,45,CAGGTTAT,0,I,1,0,40 -exampleBAM.bam,45,69,1,I,5,0,40 -exampleBAM.bam,45,73,1,D,5,0,40 -exampleBAM.bam,28,41,1,M,1,0,40 -exampleBAM.bam,33,31,1,M,1,0,40 -exampleBAM.bam,45,TGATCGTG,0,D,1,0,40 -exampleBAM.bam,29,9,1,M,1,0,40 -exampleBAM.bam,12,GC,0,M,1,0,40 -exampleBAM.bam,29,6,1,M,1,0,40 -exampleBAM.bam,45,GCCTCGTC,0,I,1,0,40 -exampleBAM.bam,45,70,1,D,5,0,40 -exampleBAM.bam,45,74,1,I,5,0,40 -exampleBAM.bam,45,TTTGGGCT,0,D,1,0,40 -exampleBAM.bam,45,TATCAATA,0,D,1,0,40 -exampleBAM.bam,33,TG,0,M,3,0,40 -exampleBAM.bam,45,TTGGTTAA,0,D,1,0,40 -exampleBAM.bam,45,TCTAGAGT,0,D,1,0,40 -exampleBAM.bam,45,TGCACTTT,0,I,1,0,40 -exampleBAM.bam,4,49,1,M,1,0,40 -exampleBAM.bam,32,18,1,M,1,0,40 -exampleBAM.bam,10,GT,0,M,1,0,40 -exampleBAM.bam,27,11,1,M,1,0,40 -exampleBAM.bam,27,CC,0,M,1,0,40 -exampleBAM.bam,45,CCATGATT,0,I,1,0,40 -exampleBAM.bam,5,TT,0,M,2,1,3 -exampleBAM.bam,18,56,1,M,1,0,40 -exampleBAM.bam,45,TGGCTTTA,0,D,1,0,40 -exampleBAM.bam,45,TGTCTTTA,0,D,1,0,40 -exampleBAM.bam,45,TCAATGTG,0,D,1,0,40 -exampleBAM.bam,12,68,1,M,1,0,40 -exampleBAM.bam,31,32,1,M,1,0,40 -exampleBAM.bam,45,GGAGCCTT,0,I,1,0,40 -exampleBAM.bam,45,CAGATCCA,0,I,1,0,40 -exampleBAM.bam,45,2,1,D,5,0,40 -exampleBAM.bam,45,14,1,I,5,0,40 -exampleBAM.bam,45,GCAATCCA,0,I,1,0,40 -exampleBAM.bam,22,TC,0,M,1,0,40 -exampleBAM.bam,45,GAGTGTTG,0,D,1,0,40 -exampleBAM.bam,15,AA,0,M,2,0,40 -exampleBAM.bam,45,GGGTTAGG,0,I,2,0,40 -exampleBAM.bam,45,TATATCAA,0,D,1,0,40 -exampleBAM.bam,17,62,1,M,1,0,40 -exampleBAM.bam,23,TT,0,M,1,0,40 -exampleBAM.bam,45,CATGATTC,0,I,1,0,40 -exampleBAM.bam,45,32,1,D,5,0,40 -exampleBAM.bam,45,44,1,I,5,0,40 -exampleBAM.bam,45,ATCCAGTT,0,I,1,0,40 -exampleBAM.bam,45,CAGTTCTA,0,D,1,0,40 -exampleBAM.bam,45,CAATCCAT,0,I,1,0,40 -exampleBAM.bam,45,TGATTCTA,0,D,1,0,40 -exampleBAM.bam,45,TCGTCCAT,0,I,1,0,40 -exampleBAM.bam,24,GT,0,M,2,0,40 -exampleBAM.bam,24,13,1,M,3,0,40 -exampleBAM.bam,30,34,1,M,1,0,40 -exampleBAM.bam,29,AC,0,M,1,0,40 -exampleBAM.bam,29,7,1,M,1,0,40 -exampleBAM.bam,32,49,1,M,1,0,40 -exampleBAM.bam,25,74,1,M,1,0,40 -exampleBAM.bam,27,40,1,M,1,0,40 -exampleBAM.bam,28,39,1,M,1,0,40 -exampleBAM.bam,45,TTGCAATC,0,D,1,0,40 -exampleBAM.bam,33,TT,0,M,4,0,40 -exampleBAM.bam,30,69,1,M,1,0,40 -exampleBAM.bam,45,71,1,D,5,0,40 -exampleBAM.bam,45,75,1,I,5,0,40 -exampleBAM.bam,45,AGCAAAAT,0,D,1,0,40 -exampleBAM.bam,32,19,1,M,1,0,40 -exampleBAM.bam,32,TC,0,M,3,0,40 -exampleBAM.bam,29,37,1,M,1,0,40 -exampleBAM.bam,27,CA,0,M,2,0,40 -exampleBAM.bam,45,ATAAAGAC,0,D,1,0,40 -exampleBAM.bam,45,CACTGATG,0,I,1,0,40 -exampleBAM.bam,45,CAGCCTCG,0,D,1,0,40 -exampleBAM.bam,45,GCACTTTC,0,D,1,0,40 -exampleBAM.bam,25,14,1,M,1,0,40 -exampleBAM.bam,34,23,1,M,1,0,40 -exampleBAM.bam,6,52,1,M,1,1,1 -exampleBAM.bam,45,TGATATAA,0,I,1,0,40 -exampleBAM.bam,45,GGTTATCA,0,D,1,0,40 -exampleBAM.bam,45,TTATATCA,0,D,1,0,40 -exampleBAM.bam,45,TCACTGAT,0,D,1,0,40 -exampleBAM.bam,45,GTGGCCTG,0,I,1,0,40 -exampleBAM.bam,45,3,1,D,5,0,40 -exampleBAM.bam,45,15,1,I,5,0,40 -exampleBAM.bam,17,63,1,M,1,0,40 -exampleBAM.bam,23,TG,0,M,1,0,40 -exampleBAM.bam,45,TTTGTATT,0,D,1,0,40 -exampleBAM.bam,24,GG,0,M,2,0,40 -exampleBAM.bam,30,35,1,M,2,0,40 -exampleBAM.bam,45,TATCATGG,0,D,1,0,40 -exampleBAM.bam,45,TGACATGG,0,D,1,0,40 -exampleBAM.bam,45,AGATCCAG,0,I,1,0,40 -exampleBAM.bam,45,33,1,D,5,0,40 -exampleBAM.bam,45,45,1,I,5,0,40 -exampleBAM.bam,45,GGAGATTA,0,I,1,0,40 -exampleBAM.bam,45,ATGGTATT,0,D,1,0,40 -exampleBAM.bam,45,ATCTCCAG,0,I,1,0,40 -exampleBAM.bam,45,CGGGTTCG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTAG,0,D,1,0,40 -exampleBAM.bam,45,AGGGTTCG,0,I,1,0,40 -exampleBAM.bam,45,68,1,D,5,0,40 -exampleBAM.bam,45,72,1,I,5,0,40 -exampleBAM.bam,45,AGTCAATG,0,D,1,0,40 -exampleBAM.bam,33,18,1,M,1,0,40 -exampleBAM.bam,33,TA,0,M,1,0,40 -exampleBAM.bam,45,TGGGTTAG,0,D,1,0,40 -exampleBAM.bam,45,TGGGTTCG,0,I,1,0,40 -exampleBAM.bam,45,TTTTCTGT,0,D,1,0,40 -exampleBAM.bam,4,TT,0,M,1,1,1 -exampleBAM.bam,29,4,1,M,1,0,40 -exampleBAM.bam,25,73,1,M,1,0,40 -exampleBAM.bam,45,AGCCTTTG,0,D,1,0,40 -exampleBAM.bam,45,ACTCTTTG,0,D,1,0,40 -exampleBAM.bam,18,58,1,M,1,1,1 -exampleBAM.bam,45,ATTATTGA,0,I,1,0,40 -exampleBAM.bam,45,ACATGATC,0,I,1,0,40 -exampleBAM.bam,28,AA,0,M,1,0,40 -exampleBAM.bam,33,48,1,M,1,0,40 -exampleBAM.bam,45,GTTAGGGT,0,D,3,0,40 -exampleBAM.bam,32,16,1,M,2,0,40 -exampleBAM.bam,32,TG,0,M,2,0,40 -exampleBAM.bam,45,GGCCTGAA,0,D,1,0,40 -exampleBAM.bam,45,12,1,I,5,0,40 -exampleBAM.bam,45,AGATTAGA,0,I,1,0,40 -exampleBAM.bam,45,GCAGCCTC,0,I,1,0,40 -exampleBAM.bam,45,AATCCATT,0,I,1,0,40 -exampleBAM.bam,45,CTTTATAT,0,I,1,0,40 -exampleBAM.bam,45,CATGGTGG,0,I,1,0,40 -exampleBAM.bam,22,TT,0,M,1,0,40 -exampleBAM.bam,24,45,1,M,1,0,40 -exampleBAM.bam,25,GT,0,M,3,0,40 -exampleBAM.bam,31,34,1,M,1,0,40 -exampleBAM.bam,34,20,1,M,1,0,40 -exampleBAM.bam,45,34,1,D,5,0,40 -exampleBAM.bam,45,46,1,I,5,0,40 -exampleBAM.bam,45,ATGAGTCA,0,I,1,0,40 -exampleBAM.bam,22,51,1,M,1,0,40 -exampleBAM.bam,45,TTTTTCTG,0,D,1,0,40 -exampleBAM.bam,45,GGGTTGGG,0,I,3,0,40 -exampleBAM.bam,45,GGTTTGGG,0,I,2,0,40 -exampleBAM.bam,45,TTAGATTT,0,I,1,0,40 -exampleBAM.bam,30,32,1,M,1,0,40 -exampleBAM.bam,23,19,1,M,1,0,40 -exampleBAM.bam,23,TC,0,M,1,0,40 -exampleBAM.bam,25,47,1,M,1,0,40 -exampleBAM.bam,10,75,1,M,1,0,40 -exampleBAM.bam,11,GG,0,M,1,0,40 -exampleBAM.bam,33,TC,0,M,6,0,40 -exampleBAM.bam,45,TGATCGTG,0,I,1,0,40 -exampleBAM.bam,45,CAGGTTAT,0,D,1,0,40 -exampleBAM.bam,45,CCAGTTCT,0,I,1,0,40 -exampleBAM.bam,45,69,1,D,5,0,40 -exampleBAM.bam,45,73,1,I,5,0,40 -exampleBAM.bam,32,51,1,M,1,0,40 -exampleBAM.bam,29,AT,0,M,2,0,40 -exampleBAM.bam,29,5,1,M,1,0,40 -exampleBAM.bam,33,49,1,M,1,0,40 -exampleBAM.bam,45,TATTGATA,0,I,1,0,40 -exampleBAM.bam,45,CCATGATA,0,I,1,0,40 -exampleBAM.bam,32,TT,0,M,2,0,40 -exampleBAM.bam,45,TGGGGGTT,0,D,1,0,40 -exampleBAM.bam,45,TTAGGGTT,0,D,3,0,40 -exampleBAM.bam,45,TTCGGGTT,0,D,3,0,40 -exampleBAM.bam,45,TTGGGGTT,0,D,1,0,40 -exampleBAM.bam,45,TTTGGGTT,0,D,1,0,40 -exampleBAM.bam,45,GTTGGGTT,0,D,1,0,40 -exampleBAM.bam,45,GTTATCAT,0,D,1,0,40 -exampleBAM.bam,45,CGTCCATG,0,I,1,0,40 -exampleBAM.bam,45,CCACCATG,0,I,1,0,40 -exampleBAM.bam,45,AAGACACA,0,D,1,0,40 -exampleBAM.bam,45,1,1,D,5,0,40 -exampleBAM.bam,45,13,1,I,5,0,40 -exampleBAM.bam,45,CTGGGGTT,0,D,1,0,40 -exampleBAM.bam,22,TG,0,M,3,0,40 -exampleBAM.bam,25,GG,0,M,2,0,40 -exampleBAM.bam,8,CA,0,M,1,0,40 -exampleBAM.bam,34,21,1,M,1,0,40 -exampleBAM.bam,24,GA,0,M,1,0,40 -exampleBAM.bam,45,GTGTTGGT,0,I,1,0,40 -exampleBAM.bam,45,TCACATGA,0,D,1,0,40 -exampleBAM.bam,45,GTCCATGA,0,D,1,0,40 -exampleBAM.bam,45,CACCATGA,0,D,1,0,40 -exampleBAM.bam,45,35,1,D,5,0,40 -exampleBAM.bam,45,47,1,I,5,0,40 -exampleBAM.bam,45,CTATTCTT,0,D,1,0,40 -exampleBAM.bam,45,AATCTAAT,0,D,1,0,40 -exampleBAM.bam,25,46,1,M,1,0,40 -exampleBAM.bam,27,76,1,M,1,0,40 -exampleBAM.bam,34,55,1,M,1,0,40 -exampleBAM.bam,31,1,1,M,1,0,40 -exampleBAM.bam,23,18,1,M,1,0,40 -exampleBAM.bam,31,66,1,M,1,0,40 -exampleBAM.bam,45,GAGATTAG,0,D,1,0,40 -exampleBAM.bam,45,TTCAGGCC,0,D,1,0,40 -exampleBAM.bam,13,AA,0,M,1,0,40 -exampleBAM.bam,45,GGTTAATG,0,D,1,0,40 -exampleBAM.bam,45,GGTGGAGC,0,D,1,0,40 -exampleBAM.bam,21,TT,0,M,1,0,40 -exampleBAM.bam,21,17,1,M,1,0,40 -exampleBAM.bam,12,AG,0,M,1,0,40 -exampleBAM.bam,45,GGCCACCA,0,I,1,0,40 -exampleBAM.bam,45,GCTGGGGT,0,D,1,0,40 -exampleBAM.bam,45,CTTGGCTT,0,I,1,0,40 -exampleBAM.bam,45,66,1,D,5,0,40 -exampleBAM.bam,26,GT,0,M,1,0,40 -exampleBAM.bam,45,TAATCTCC,0,D,1,0,40 -exampleBAM.bam,45,GTTGGGGT,0,D,1,0,40 -exampleBAM.bam,28,34,1,M,1,0,40 -exampleBAM.bam,45,TTGGGGGT,0,D,1,0,40 -exampleBAM.bam,17,58,1,M,1,0,40 -exampleBAM.bam,31,6,1,M,1,0,40 -exampleBAM.bam,45,CCTTTGCA,0,I,1,0,40 -exampleBAM.bam,45,36,1,D,5,0,40 -exampleBAM.bam,45,40,1,I,5,0,40 -exampleBAM.bam,45,CAGGCACC,0,D,1,0,40 -exampleBAM.bam,45,GTTCTAGA,0,I,1,0,40 -exampleBAM.bam,45,TATTTGCA,0,I,1,0,40 -exampleBAM.bam,34,TA,0,M,1,0,40 -exampleBAM.bam,25,CC,0,M,1,0,40 -exampleBAM.bam,22,23,1,M,1,0,40 -exampleBAM.bam,45,GAACTGGG,0,I,1,0,40 -exampleBAM.bam,45,6,1,D,5,0,40 -exampleBAM.bam,45,10,1,I,5,0,40 -exampleBAM.bam,45,GGGCTGGG,0,I,1,0,40 -exampleBAM.bam,45,TTGATATA,0,D,1,0,40 -exampleBAM.bam,45,TTCTTAAG,0,D,1,0,40 -exampleBAM.bam,27,GA,0,M,2,0,40 -exampleBAM.bam,27,14,1,M,1,0,40 -exampleBAM.bam,32,23,1,M,1,0,40 -exampleBAM.bam,21,50,1,M,1,0,40 -exampleBAM.bam,45,TAACCTGG,0,D,1,0,40 -exampleBAM.bam,45,TCTATTCT,0,I,1,0,40 -exampleBAM.bam,11,40,1,M,1,1,1 -exampleBAM.bam,45,TTTATTAT,0,D,1,0,40 -exampleBAM.bam,45,ATGATTCT,0,I,1,0,40 -exampleBAM.bam,45,CCTGGAGA,0,D,1,0,40 -exampleBAM.bam,45,GCCAGGCA,0,D,1,0,40 -exampleBAM.bam,12,AT,0,M,1,0,40 -exampleBAM.bam,32,53,1,M,1,0,40 -exampleBAM.bam,21,TG,0,M,3,0,40 -exampleBAM.bam,26,GG,0,M,1,0,40 -exampleBAM.bam,45,TCTGTGTC,0,D,1,0,40 -exampleBAM.bam,45,GTTGGGGG,0,D,1,0,40 -exampleBAM.bam,45,TTGGGCTG,0,I,1,0,40 -exampleBAM.bam,45,AAATCTAA,0,I,1,0,40 -exampleBAM.bam,45,67,1,D,5,0,40 -exampleBAM.bam,45,CTGGAGAT,0,D,1,0,40 -exampleBAM.bam,45,AGATTTTT,0,D,1,0,40 -exampleBAM.bam,45,AGGCACCC,0,I,1,0,40 -exampleBAM.bam,45,CTGAAAGT,0,I,1,0,40 -exampleBAM.bam,8,46,1,M,1,0,40 -exampleBAM.bam,45,TCCAGGTT,0,D,1,0,40 -exampleBAM.bam,45,GTGAGTGT,0,I,1,0,40 -exampleBAM.bam,24,CG,0,M,1,0,40 -exampleBAM.bam,45,TTATCATG,0,I,1,0,40 -exampleBAM.bam,45,ACAGCAAA,0,I,1,0,40 -exampleBAM.bam,45,37,1,D,5,0,40 -exampleBAM.bam,45,41,1,I,5,0,40 -exampleBAM.bam,45,AGTGCAAA,0,I,1,0,40 -exampleBAM.bam,34,TC,0,M,3,0,40 -exampleBAM.bam,25,CA,0,M,1,0,40 -exampleBAM.bam,30,AT,0,M,1,0,40 -exampleBAM.bam,45,TTTATATC,0,D,1,0,40 -exampleBAM.bam,45,TTACTCTT,0,D,1,0,40 -exampleBAM.bam,45,GTATTACT,0,I,1,0,40 -exampleBAM.bam,45,TGGTTAAT,0,D,1,0,40 -exampleBAM.bam,45,7,1,D,5,0,40 -exampleBAM.bam,45,11,1,I,5,0,40 -exampleBAM.bam,45,CCTGAAAG,0,D,1,0,40 -exampleBAM.bam,45,CTTTGCAC,0,I,1,0,40 -exampleBAM.bam,45,GTGAACTG,0,D,1,0,40 -exampleBAM.bam,45,TTGGCTTT,0,I,1,0,40 -exampleBAM.bam,28,2,1,M,1,0,40 -exampleBAM.bam,19,30,1,M,1,0,40 -exampleBAM.bam,27,GT,0,M,1,0,40 -exampleBAM.bam,45,64,1,D,5,0,40 -exampleBAM.bam,45,76,1,I,5,0,40 -exampleBAM.bam,45,AGTGTTGG,0,I,1,0,40 -exampleBAM.bam,45,AGGGTTGG,0,I,1,0,40 -exampleBAM.bam,45,GATTCTAT,0,I,1,0,40 -exampleBAM.bam,45,AGACACAG,0,D,1,0,40 -exampleBAM.bam,45,GGGGTTGG,0,I,2,0,40 -exampleBAM.bam,15,68,1,M,1,0,40 -exampleBAM.bam,45,TATAAAGA,0,I,1,0,40 -exampleBAM.bam,33,22,1,M,2,0,40 -exampleBAM.bam,12,AA,0,M,1,0,40 -exampleBAM.bam,32,54,1,M,1,0,40 -exampleBAM.bam,45,CTCGTCCA,0,I,1,0,40 -exampleBAM.bam,45,38,1,D,5,0,40 -exampleBAM.bam,45,42,1,I,5,0,40 -exampleBAM.bam,45,TTAAGTGA,0,I,1,0,40 -exampleBAM.bam,45,TTTGCAAT,0,I,1,0,40 -exampleBAM.bam,45,TTTGCACT,0,D,1,0,40 -exampleBAM.bam,24,CC,0,M,2,0,40 -exampleBAM.bam,45,TGAGTCAA,0,D,1,0,40 -exampleBAM.bam,6,TT,0,M,2,1,3 -exampleBAM.bam,31,4,1,M,1,0,40 -exampleBAM.bam,31,AG,0,M,2,0,40 -exampleBAM.bam,34,50,1,M,1,0,40 -exampleBAM.bam,27,73,1,M,1,0,40 -exampleBAM.bam,45,GACACAGC,0,D,1,0,40 -exampleBAM.bam,45,AACCTGGA,0,I,1,0,40 -exampleBAM.bam,45,4,1,D,5,0,40 -exampleBAM.bam,45,8,1,I,5,0,40 -exampleBAM.bam,16,58,1,M,1,0,40 -exampleBAM.bam,30,AA,0,M,2,0,40 -exampleBAM.bam,24,41,1,M,1,0,40 -exampleBAM.bam,34,TG,0,M,3,0,40 -exampleBAM.bam,29,68,1,M,1,0,40 -exampleBAM.bam,25,9,1,M,1,0,40 -exampleBAM.bam,26,44,1,M,1,0,40 -exampleBAM.bam,45,GGTATTAC,0,D,1,0,40 -exampleBAM.bam,45,TGTGAACT,0,I,1,0,40 -exampleBAM.bam,45,TGGCCTGA,0,D,1,0,40 -exampleBAM.bam,5,22,1,M,1,0,40 -exampleBAM.bam,45,AAGTGCAA,0,I,1,0,40 -exampleBAM.bam,45,ATTTGCAA,0,I,1,0,40 -exampleBAM.bam,45,ATCTAATC,0,D,1,0,40 -exampleBAM.bam,27,GG,0,M,1,0,40 -exampleBAM.bam,21,48,1,M,1,0,40 -exampleBAM.bam,45,TGAGTGTT,0,D,1,0,40 -exampleBAM.bam,13,39,1,M,1,0,40 -exampleBAM.bam,45,TAAAGACA,0,D,1,0,40 -exampleBAM.bam,33,23,1,M,1,0,40 -exampleBAM.bam,45,GTGGAGCC,0,I,1,0,40 -exampleBAM.bam,45,TTTCACAT,0,D,1,0,40 -exampleBAM.bam,45,65,1,D,5,0,40 -exampleBAM.bam,45,GATTTTTC,0,D,1,0,40 -exampleBAM.bam,45,AGTTCTAG,0,I,1,0,40 -exampleBAM.bam,19,61,1,M,1,0,40 -exampleBAM.bam,28,71,1,M,1,0,40 -exampleBAM.bam,15,35,1,M,1,0,40 -exampleBAM.bam,24,CA,0,M,1,0,40 -exampleBAM.bam,24,10,1,M,1,1,1 -exampleBAM.bam,45,TTATTGAT,0,D,1,0,40 -exampleBAM.bam,45,ATAACCTG,0,I,1,0,40 -exampleBAM.bam,45,GAAAGTGC,0,I,1,0,40 -exampleBAM.bam,45,39,1,D,5,0,40 -exampleBAM.bam,45,43,1,I,5,0,40 -exampleBAM.bam,31,AT,0,M,2,0,40 -exampleBAM.bam,31,5,1,M,1,0,40 -exampleBAM.bam,34,51,1,M,1,0,40 -exampleBAM.bam,27,72,1,M,1,0,40 -exampleBAM.bam,30,AC,0,M,1,0,40 -exampleBAM.bam,45,CATGGTAT,0,D,1,0,40 -exampleBAM.bam,45,ATGATCGT,0,I,1,0,40 -exampleBAM.bam,45,5,1,D,5,0,40 -exampleBAM.bam,45,9,1,I,5,0,40 -exampleBAM.bam,45,GCACCCAG,0,I,1,0,40 -exampleBAM.bam,34,TT,0,M,6,0,40 -exampleBAM.bam,31,39,1,M,2,0,40 -exampleBAM.bam,14,33,1,M,1,0,40 -EOF