Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2012-03-29 12:33:13 -04:00
commit e61e162c81
7 changed files with 54 additions and 60 deletions

View File

@ -60,24 +60,12 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
if ( format.equals("") ) {
this.format = "%s";
this.dataType = GATKReportDataType.Unknown;
if ( defaultValue != null ) {
this.defaultValue = defaultValue;
//this.dataType = GATKReportDataType.fromObject(defaultValue);
}
else {
this.defaultValue = "";
//this.dataType = GATKReportDataType.Unknown;
}
this.defaultValue = (defaultValue != null) ? defaultValue : "";
}
else {
this.format = format;
this.dataType = GATKReportDataType.fromFormatString(format);
if ( defaultValue == null ) {
this.defaultValue = dataType.getDefaultValue();
}
else {
this.defaultValue = defaultValue;
}
this.defaultValue = (defaultValue != null) ? defaultValue : dataType.getDefaultValue();
}
}
@ -225,8 +213,10 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
public Object put(Object key, Object value) {
if (value != null) {
String formatted = formatValue(value);
updateMaxWidth(formatted);
updateFormat(formatted);
if (!formatted.equals("")) {
updateMaxWidth(formatted);
updateFormat(formatted);
}
}
return super.put(key, value);
}
@ -236,7 +226,7 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
}
private void updateFormat(String formatted) {
if (!isRightAlign(formatted))
alignment = GATKReportColumnFormat.Alignment.LEFT;
if (alignment == GATKReportColumnFormat.Alignment.RIGHT)
alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT;
}
}

View File

@ -313,7 +313,7 @@ public class RecalDataManager {
* @param read The read to adjust
* @param RAC The list of shared command line arguments
*/
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
public static void parsePlatformForRead(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
GATKSAMReadGroupRecord readGroup = read.getReadGroup();
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
@ -337,47 +337,47 @@ public class RecalDataManager {
}
/**
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are
* inconsistent with the color space. If there is no call in the color space, this method returns true meaning
* this read should be skipped
*
* @param read The SAMRecord to parse
* @param strategy the strategy used for SOLID no calls
* @param read The SAMRecord to parse
* @return whether or not this read should be skipped
*/
public static void parseColorSpace(final GATKSAMRecord read) {
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (ReadUtils.isSOLiDRead(read)) {
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
if (attr instanceof String) {
if (attr instanceof String)
colorSpace = ((String) attr).getBytes();
}
else {
else
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases();
if (read.getReadNegativeStrandFlag()) {
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
if (read.getReadNegativeStrandFlag())
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
}
final byte[] inconsistency = new byte[readBases.length];
int iii;
byte prevBase = colorSpace[0]; // The sentinel
for (iii = 0; iii < readBases.length; iii++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1);
prevBase = readBases[iii];
int i;
byte prevBase = colorSpace[0]; // The sentinel
for (i = 0; i < readBases.length; i++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
prevBase = readBases[i];
}
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
}
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
else {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
else
return false; // otherwise, just skip the read
}
}
return true;
}
/**

View File

@ -109,7 +109,6 @@ public class RecalDatum extends RecalDatumOptimized {
public final void resetCalculatedQualities() {
empiricalQuality = 0.0;
estimatedQReported = 0.0;
}
private double calcExpectedErrors() {

View File

@ -24,7 +24,7 @@ public class RecalibrationReport {
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
public RecalibrationReport(final File RECAL_FILE) {
GATKReport report = new GATKReport(RECAL_FILE);
@ -77,10 +77,11 @@ public class RecalibrationReport {
/**
* Combines two recalibration reports by adding all observations and errors
*
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate them
* after combining. The reason for not calculating it is because this function is inteded for combining a series of
* recalibration reports, and it only makes sense to calculate the empirical qualities and quantized qualities after all
* the recalibration reports have been combined. Having the user recalculate when appropriate, makes this method faster
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
* them after combining. The reason for not calculating it is because this function is inteded for combining a
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
* makes this method faster
*
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
*
@ -97,7 +98,7 @@ public class RecalibrationReport {
if (thisDatum == null)
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
else
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
thisDatum.combine(otherDatum); // add the two datum objects into 'this'
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
}
}

View File

@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo();

View File

@ -51,6 +51,8 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public static final int DEFAULT_PLOIDY = 2;
public enum OUTPUT_MODE {
/** produces calls only at variant sites */
@ -95,7 +97,8 @@ public class UnifiedGenotyperEngine {
private final Logger logger;
private final PrintStream verboseWriter;
// number of chromosomes (2 * samples) in input
// number of chromosomes (ploidy * samples) in input
private final int ploidy;
private final int N;
// the standard filter to use for calls below the confidence threshold but above the emit threshold
@ -112,11 +115,11 @@ public class UnifiedGenotyperEngine {
// ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
}
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int ploidy) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples);
@ -127,7 +130,8 @@ public class UnifiedGenotyperEngine {
this.verboseWriter = verboseWriter;
this.annotationEngine = engine;
this.N = N;
this.ploidy = ploidy;
this.N = samples.size() * ploidy;
log10AlleleFrequencyPriorsSNPs = new double[N+1];
log10AlleleFrequencyPriorsIndels = new double[N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);

View File

@ -60,7 +60,7 @@ public class BQSRGathererUnitTest {
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
*/
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
for (String column : columnsToTest) {
Object actual = calculated.get(primaryKey, column);
Object expected = original.get(primaryKey, column);