Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e61e162c81
|
|
@ -60,24 +60,12 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
|||
if ( format.equals("") ) {
|
||||
this.format = "%s";
|
||||
this.dataType = GATKReportDataType.Unknown;
|
||||
if ( defaultValue != null ) {
|
||||
this.defaultValue = defaultValue;
|
||||
//this.dataType = GATKReportDataType.fromObject(defaultValue);
|
||||
}
|
||||
else {
|
||||
this.defaultValue = "";
|
||||
//this.dataType = GATKReportDataType.Unknown;
|
||||
}
|
||||
this.defaultValue = (defaultValue != null) ? defaultValue : "";
|
||||
}
|
||||
else {
|
||||
this.format = format;
|
||||
this.dataType = GATKReportDataType.fromFormatString(format);
|
||||
if ( defaultValue == null ) {
|
||||
this.defaultValue = dataType.getDefaultValue();
|
||||
}
|
||||
else {
|
||||
this.defaultValue = defaultValue;
|
||||
}
|
||||
this.defaultValue = (defaultValue != null) ? defaultValue : dataType.getDefaultValue();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -225,8 +213,10 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
|||
public Object put(Object key, Object value) {
|
||||
if (value != null) {
|
||||
String formatted = formatValue(value);
|
||||
updateMaxWidth(formatted);
|
||||
updateFormat(formatted);
|
||||
if (!formatted.equals("")) {
|
||||
updateMaxWidth(formatted);
|
||||
updateFormat(formatted);
|
||||
}
|
||||
}
|
||||
return super.put(key, value);
|
||||
}
|
||||
|
|
@ -236,7 +226,7 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
|||
}
|
||||
|
||||
private void updateFormat(String formatted) {
|
||||
if (!isRightAlign(formatted))
|
||||
alignment = GATKReportColumnFormat.Alignment.LEFT;
|
||||
if (alignment == GATKReportColumnFormat.Alignment.RIGHT)
|
||||
alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -313,7 +313,7 @@ public class RecalDataManager {
|
|||
* @param read The read to adjust
|
||||
* @param RAC The list of shared command line arguments
|
||||
*/
|
||||
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||
public static void parsePlatformForRead(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||
GATKSAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
|
||||
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
||||
|
|
@ -337,47 +337,47 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
|
||||
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are
|
||||
* inconsistent with the color space. If there is no call in the color space, this method returns true meaning
|
||||
* this read should be skipped
|
||||
*
|
||||
* @param read The SAMRecord to parse
|
||||
* @param strategy the strategy used for SOLID no calls
|
||||
* @param read The SAMRecord to parse
|
||||
* @return whether or not this read should be skipped
|
||||
*/
|
||||
public static void parseColorSpace(final GATKSAMRecord read) {
|
||||
|
||||
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if (ReadUtils.isSOLiDRead(read)) {
|
||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
if (attr instanceof String) {
|
||||
if (attr instanceof String)
|
||||
colorSpace = ((String) attr).getBytes();
|
||||
}
|
||||
else {
|
||||
else
|
||||
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
byte[] readBases = read.getReadBases();
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
|
||||
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
||||
}
|
||||
|
||||
final byte[] inconsistency = new byte[readBases.length];
|
||||
int iii;
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for (iii = 0; iii < readBases.length; iii++) {
|
||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
|
||||
inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1);
|
||||
prevBase = readBases[iii];
|
||||
int i;
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for (i = 0; i < readBases.length; i++) {
|
||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
|
||||
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
|
||||
prevBase = readBases[i];
|
||||
}
|
||||
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||
}
|
||||
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
}
|
||||
else
|
||||
return false; // otherwise, just skip the read
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
|
||||
public final void resetCalculatedQualities() {
|
||||
empiricalQuality = 0.0;
|
||||
estimatedQReported = 0.0;
|
||||
}
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public class RecalibrationReport {
|
|||
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
|
||||
|
||||
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
|
||||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
|
@ -77,10 +77,11 @@ public class RecalibrationReport {
|
|||
/**
|
||||
* Combines two recalibration reports by adding all observations and errors
|
||||
*
|
||||
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate them
|
||||
* after combining. The reason for not calculating it is because this function is inteded for combining a series of
|
||||
* recalibration reports, and it only makes sense to calculate the empirical qualities and quantized qualities after all
|
||||
* the recalibration reports have been combined. Having the user recalculate when appropriate, makes this method faster
|
||||
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
|
||||
* them after combining. The reason for not calculating it is because this function is inteded for combining a
|
||||
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
|
||||
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
|
||||
* makes this method faster
|
||||
*
|
||||
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
||||
*
|
||||
|
|
@ -97,7 +98,7 @@ public class RecalibrationReport {
|
|||
if (thisDatum == null)
|
||||
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
|
||||
else
|
||||
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
|
||||
thisDatum.combine(otherDatum); // add the two datum objects into 'this'
|
||||
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
|
||||
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the header
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ import java.util.*;
|
|||
|
||||
public class UnifiedGenotyperEngine {
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
|
||||
public enum OUTPUT_MODE {
|
||||
/** produces calls only at variant sites */
|
||||
|
|
@ -95,7 +97,8 @@ public class UnifiedGenotyperEngine {
|
|||
private final Logger logger;
|
||||
private final PrintStream verboseWriter;
|
||||
|
||||
// number of chromosomes (2 * samples) in input
|
||||
// number of chromosomes (ploidy * samples) in input
|
||||
private final int ploidy;
|
||||
private final int N;
|
||||
|
||||
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
||||
|
|
@ -112,11 +115,11 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int ploidy) {
|
||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||
genomeLocParser = toolkit.getGenomeLocParser();
|
||||
this.samples = new TreeSet<String>(samples);
|
||||
|
|
@ -127,7 +130,8 @@ public class UnifiedGenotyperEngine {
|
|||
this.verboseWriter = verboseWriter;
|
||||
this.annotationEngine = engine;
|
||||
|
||||
this.N = N;
|
||||
this.ploidy = ploidy;
|
||||
this.N = samples.size() * ploidy;
|
||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class BQSRGathererUnitTest {
|
|||
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
|
||||
*/
|
||||
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
|
||||
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
|
||||
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
|
||||
for (String column : columnsToTest) {
|
||||
Object actual = calculated.get(primaryKey, column);
|
||||
Object expected = original.get(primaryKey, column);
|
||||
|
|
|
|||
Loading…
Reference in New Issue