Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2012-03-29 12:33:13 -04:00
commit e61e162c81
7 changed files with 54 additions and 60 deletions

View File

@ -60,24 +60,12 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
if ( format.equals("") ) { if ( format.equals("") ) {
this.format = "%s"; this.format = "%s";
this.dataType = GATKReportDataType.Unknown; this.dataType = GATKReportDataType.Unknown;
if ( defaultValue != null ) { this.defaultValue = (defaultValue != null) ? defaultValue : "";
this.defaultValue = defaultValue;
//this.dataType = GATKReportDataType.fromObject(defaultValue);
}
else {
this.defaultValue = "";
//this.dataType = GATKReportDataType.Unknown;
}
} }
else { else {
this.format = format; this.format = format;
this.dataType = GATKReportDataType.fromFormatString(format); this.dataType = GATKReportDataType.fromFormatString(format);
if ( defaultValue == null ) { this.defaultValue = (defaultValue != null) ? defaultValue : dataType.getDefaultValue();
this.defaultValue = dataType.getDefaultValue();
}
else {
this.defaultValue = defaultValue;
}
} }
} }
@ -225,8 +213,10 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
public Object put(Object key, Object value) { public Object put(Object key, Object value) {
if (value != null) { if (value != null) {
String formatted = formatValue(value); String formatted = formatValue(value);
updateMaxWidth(formatted); if (!formatted.equals("")) {
updateFormat(formatted); updateMaxWidth(formatted);
updateFormat(formatted);
}
} }
return super.put(key, value); return super.put(key, value);
} }
@ -236,7 +226,7 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
} }
private void updateFormat(String formatted) { private void updateFormat(String formatted) {
if (!isRightAlign(formatted)) if (alignment == GATKReportColumnFormat.Alignment.RIGHT)
alignment = GATKReportColumnFormat.Alignment.LEFT; alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT;
} }
} }

View File

@ -313,7 +313,7 @@ public class RecalDataManager {
* @param read The read to adjust * @param read The read to adjust
* @param RAC The list of shared command line arguments * @param RAC The list of shared command line arguments
*/ */
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) { public static void parsePlatformForRead(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
GATKSAMReadGroupRecord readGroup = read.getReadGroup(); GATKSAMReadGroupRecord readGroup = read.getReadGroup();
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) { if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
@ -337,47 +337,47 @@ public class RecalDataManager {
} }
/** /**
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are
* inconsistent with the color space. If there is no call in the color space, this method returns true meaning
* this read should be skipped
* *
* @param read The SAMRecord to parse * @param strategy the strategy used for SOLID no calls
* @param read The SAMRecord to parse
* @return whether or not this read should be skipped
*/ */
public static void parseColorSpace(final GATKSAMRecord read) { public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
if (ReadUtils.isSOLiDRead(read)) {
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) { if (attr != null) {
byte[] colorSpace; byte[] colorSpace;
if (attr instanceof String) { if (attr instanceof String)
colorSpace = ((String) attr).getBytes(); colorSpace = ((String) attr).getBytes();
} else
else {
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read if (read.getReadNegativeStrandFlag())
byte[] readBases = read.getReadBases();
if (read.getReadNegativeStrandFlag()) {
readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
}
final byte[] inconsistency = new byte[readBases.length]; final byte[] inconsistency = new byte[readBases.length];
int iii; int i;
byte prevBase = colorSpace[0]; // The sentinel byte prevBase = colorSpace[0]; // The sentinel
for (iii = 0; iii < readBases.length; iii++) { for (i = 0; i < readBases.length; i++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]); final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1); inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
prevBase = readBases[iii]; prevBase = readBases[i];
} }
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
}
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
} else
else { return false; // otherwise, just skip the read
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
} }
} }
return true;
} }
/** /**

View File

@ -109,7 +109,6 @@ public class RecalDatum extends RecalDatumOptimized {
public final void resetCalculatedQualities() { public final void resetCalculatedQualities() {
empiricalQuality = 0.0; empiricalQuality = 0.0;
estimatedQReported = 0.0;
} }
private double calcExpectedErrors() { private double calcExpectedErrors() {

View File

@ -24,7 +24,7 @@ public class RecalibrationReport {
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code"; private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
public RecalibrationReport(final File RECAL_FILE) { public RecalibrationReport(final File RECAL_FILE) {
GATKReport report = new GATKReport(RECAL_FILE); GATKReport report = new GATKReport(RECAL_FILE);
@ -77,10 +77,11 @@ public class RecalibrationReport {
/** /**
* Combines two recalibration reports by adding all observations and errors * Combines two recalibration reports by adding all observations and errors
* *
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate them * Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
* after combining. The reason for not calculating it is because this function is inteded for combining a series of * them after combining. The reason for not calculating it is because this function is inteded for combining a
* recalibration reports, and it only makes sense to calculate the empirical qualities and quantized qualities after all * series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
* the recalibration reports have been combined. Having the user recalculate when appropriate, makes this method faster * qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
* makes this method faster
* *
* Note2: The empirical quality reported, however, is recalculated given its simplicity. * Note2: The empirical quality reported, however, is recalculated given its simplicity.
* *
@ -97,7 +98,7 @@ public class RecalibrationReport {
if (thisDatum == null) if (thisDatum == null)
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it! thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
else else
thisDatum.increment(otherDatum); // add the two datum objects into 'this' thisDatum.combine(otherDatum); // add the two datum objects into 'this'
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
} }
} }

View File

@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size()); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
// initialize the header // initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo(); Set<VCFHeaderLine> headerInfo = getHeaderInfo();

View File

@ -51,6 +51,8 @@ import java.util.*;
public class UnifiedGenotyperEngine { public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public static final int DEFAULT_PLOIDY = 2;
public enum OUTPUT_MODE { public enum OUTPUT_MODE {
/** produces calls only at variant sites */ /** produces calls only at variant sites */
@ -95,7 +97,8 @@ public class UnifiedGenotyperEngine {
private final Logger logger; private final Logger logger;
private final PrintStream verboseWriter; private final PrintStream verboseWriter;
// number of chromosomes (2 * samples) in input // number of chromosomes (ploidy * samples) in input
private final int ploidy;
private final int N; private final int N;
// the standard filter to use for calls below the confidence threshold but above the emit threshold // the standard filter to use for calls below the confidence threshold but above the emit threshold
@ -112,11 +115,11 @@ public class UnifiedGenotyperEngine {
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"}) @Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size())); this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
} }
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"}) @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int ploidy) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser(); genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples); this.samples = new TreeSet<String>(samples);
@ -127,7 +130,8 @@ public class UnifiedGenotyperEngine {
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
this.annotationEngine = engine; this.annotationEngine = engine;
this.N = N; this.ploidy = ploidy;
this.N = samples.size() * ploidy;
log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsSNPs = new double[N+1];
log10AlleleFrequencyPriorsIndels = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);

View File

@ -60,7 +60,7 @@ public class BQSRGathererUnitTest {
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated * @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
*/ */
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) { private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
for (String column : columnsToTest) { for (String column : columnsToTest) {
Object actual = calculated.get(primaryKey, column); Object actual = calculated.get(primaryKey, column);
Object expected = original.get(primaryKey, column); Object expected = original.get(primaryKey, column);