Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
3561056a9c
|
|
@ -224,22 +224,23 @@ public class RecalDataManager {
|
|||
public static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
Map<BitSet, RecalDatum> recalTable = entry.getValue();
|
||||
|
||||
boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "");
|
||||
final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.2f");
|
||||
final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.2f");
|
||||
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
long primaryKey = 0L;
|
||||
|
||||
List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
|
||||
ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
|
|
@ -256,15 +257,17 @@ public class RecalDataManager {
|
|||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
columnNames.add(estimatedQReported);
|
||||
if (isReadGroupTable)
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
|
||||
reportTable.addPrimaryKey("PrimaryKey", false); // every table must have a primary key (hidden)
|
||||
for (Pair<String, String> columnName : columnNames)
|
||||
reportTable.addColumn(columnName.getFirst(), true, columnName.getSecond()); // every table must have the event type
|
||||
|
||||
long primaryKey = 0L;
|
||||
|
||||
for (Map.Entry<BitSet, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
BitSet bitSetKey = recalTableEntry.getKey();
|
||||
Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
|
|
@ -274,8 +277,9 @@ public class RecalDataManager {
|
|||
columnData.put(columnName, key);
|
||||
}
|
||||
RecalDatum datum = recalTableEntry.getValue();
|
||||
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality()); // iterator.next() gives the column name for Empirical Quality
|
||||
columnData.put(iterator.next().getFirst(), Math.round(datum.getEstimatedQReported())); // iterator.next() gives the column name for EstimatedQReported
|
||||
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality());
|
||||
if (isReadGroupTable)
|
||||
columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
columnData.put(iterator.next().getFirst(), datum.numObservations);
|
||||
columnData.put(iterator.next().getFirst(), datum.numMismatches);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -76,7 +78,7 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
public final void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
this.increment(other.numObservations, other.numMismatches);
|
||||
this.estimatedQReported = -10 * Math.log10(sumErrors / (double) this.numObservations);
|
||||
this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -90,7 +92,7 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
}
|
||||
|
||||
public final void calcEstimatedReportedQuality() {
|
||||
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / (double) numObservations);
|
||||
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -107,15 +109,16 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
return empiricalQuality;
|
||||
}
|
||||
|
||||
public final void resetCalculatedQualities() {
|
||||
empiricalQuality = 0.0;
|
||||
}
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
return (double) this.numObservations * QualityUtils.qualToProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
/**
|
||||
* Makes a hard copy of the recal datum element
|
||||
*
|
||||
* @return a new recal datum object with the same contents of this datum.
|
||||
*/
|
||||
protected RecalDatum copy() {
|
||||
return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,23 +88,31 @@ public class RecalibrationReport {
|
|||
* @param other the recalibration report to combine with this one
|
||||
*/
|
||||
public void combine(RecalibrationReport other) {
|
||||
Iterator<Map<BitSet, RecalDatum>> tableIterator = keysAndTablesMap.values().iterator(); // because these are ordered (linked hashmaps) we can iterate over the 'this' and do a for loop on the 'other' tables and be sure that we are looking at the equivalent tables on both objects
|
||||
for (Map<BitSet, RecalDatum> otherTable : other.getKeysAndTablesMap().values()) { // iterate over all tables for 'other'
|
||||
Map<BitSet, RecalDatum> thisTable = tableIterator.next(); // iterate over all tables for 'this'
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : otherTable.entrySet()) { // for each table, go through all the entries in the 'other' dataset to update 'this' dataset
|
||||
BitSet key = entry.getKey();
|
||||
RecalDatum otherDatum = entry.getValue();
|
||||
RecalDatum thisDatum = thisTable.get(key);
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> thisEntry = thisIterator.next();
|
||||
|
||||
Map<BitSet, RecalDatum> thisTable = thisEntry.getValue();
|
||||
BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
|
||||
for (Map.Entry<BitSet, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
BitSet otherBitKey = otherTableEntry.getKey();
|
||||
List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
|
||||
BitSet thisBitKey = thisKeyManager.bitSetFromKey(otherObjectKey.toArray());
|
||||
RecalDatum thisDatum = thisTable.get(thisBitKey);
|
||||
|
||||
if (thisDatum == null)
|
||||
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
|
||||
thisTable.put(thisBitKey, otherDatum);
|
||||
else
|
||||
thisDatum.combine(otherDatum); // add the two datum objects into 'this'
|
||||
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
||||
thisDatum.combine(otherDatum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public QuantizationInfo getQuantizationInfo() {
|
||||
return quantizationInfo;
|
||||
}
|
||||
|
|
@ -279,13 +287,11 @@ public class RecalibrationReport {
|
|||
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||
*/
|
||||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
quantizationInfo.quantizeQualityScores(RAC.QUANTIZING_LEVELS);
|
||||
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values()) {
|
||||
for (RecalDatum datum : table.values()) {
|
||||
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE);
|
||||
datum.calcEstimatedReportedQuality();
|
||||
}
|
||||
}
|
||||
|
||||
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
||||
public void output(PrintStream output) {
|
||||
|
|
|
|||
|
|
@ -45,6 +45,10 @@ import java.util.*;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class ReadUtils {
|
||||
|
||||
private static final String OFFSET_OUT_OF_BOUNDS_EXCEPTION = "Offset cannot be greater than read length %d : %d";
|
||||
private static final String OFFSET_NOT_ZERO_EXCEPTION = "We ran past the end of the read and never found the offset, something went wrong!";
|
||||
|
||||
private ReadUtils() {
|
||||
}
|
||||
|
||||
|
|
@ -748,4 +752,31 @@ public class ReadUtils {
|
|||
return Arrays.deepToString(sequenceRecordNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the reference coordinate for a read coordinate
|
||||
*
|
||||
* @param read the read
|
||||
* @param offset the base in the read (coordinate in the read)
|
||||
* @return the reference coordinate correspondent to this base
|
||||
*/
|
||||
public static long getReferenceCoordinateForReadCoordinate(GATKSAMRecord read, int offset) {
|
||||
if (offset > read.getReadLength())
|
||||
throw new ReviewedStingException(String.format(OFFSET_OUT_OF_BOUNDS_EXCEPTION, offset, read.getReadLength()));
|
||||
|
||||
long location = read.getAlignmentStart();
|
||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||
while (offset > 0 && cigarElementIterator.hasNext()) {
|
||||
CigarElement cigarElement = cigarElementIterator.next();
|
||||
long move = 0;
|
||||
if (cigarElement.getOperator().consumesReferenceBases())
|
||||
move = (long) Math.min(cigarElement.getLength(), offset);
|
||||
location += move;
|
||||
offset -= move;
|
||||
}
|
||||
if (offset > 0 && !cigarElementIterator.hasNext())
|
||||
throw new ReviewedStingException(OFFSET_NOT_ZERO_EXCEPTION);
|
||||
|
||||
return location;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ public class BQSRGathererUnitTest {
|
|||
for (GATKReportTable originalTable : originalReport.getTables()) {
|
||||
GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
|
||||
List<String> columnsToTest = new LinkedList<String>();
|
||||
columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
|
||||
columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
|
||||
|
|
@ -44,8 +46,6 @@ public class BQSRGathererUnitTest {
|
|||
}
|
||||
|
||||
else if (originalTable.getTableName().startsWith("RecalTable")) {
|
||||
columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue