Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e61e162c81
|
|
@ -60,24 +60,12 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
||||||
if ( format.equals("") ) {
|
if ( format.equals("") ) {
|
||||||
this.format = "%s";
|
this.format = "%s";
|
||||||
this.dataType = GATKReportDataType.Unknown;
|
this.dataType = GATKReportDataType.Unknown;
|
||||||
if ( defaultValue != null ) {
|
this.defaultValue = (defaultValue != null) ? defaultValue : "";
|
||||||
this.defaultValue = defaultValue;
|
|
||||||
//this.dataType = GATKReportDataType.fromObject(defaultValue);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.defaultValue = "";
|
|
||||||
//this.dataType = GATKReportDataType.Unknown;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.format = format;
|
this.format = format;
|
||||||
this.dataType = GATKReportDataType.fromFormatString(format);
|
this.dataType = GATKReportDataType.fromFormatString(format);
|
||||||
if ( defaultValue == null ) {
|
this.defaultValue = (defaultValue != null) ? defaultValue : dataType.getDefaultValue();
|
||||||
this.defaultValue = dataType.getDefaultValue();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.defaultValue = defaultValue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -225,8 +213,10 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
||||||
public Object put(Object key, Object value) {
|
public Object put(Object key, Object value) {
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
String formatted = formatValue(value);
|
String formatted = formatValue(value);
|
||||||
updateMaxWidth(formatted);
|
if (!formatted.equals("")) {
|
||||||
updateFormat(formatted);
|
updateMaxWidth(formatted);
|
||||||
|
updateFormat(formatted);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return super.put(key, value);
|
return super.put(key, value);
|
||||||
}
|
}
|
||||||
|
|
@ -236,7 +226,7 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateFormat(String formatted) {
|
private void updateFormat(String formatted) {
|
||||||
if (!isRightAlign(formatted))
|
if (alignment == GATKReportColumnFormat.Alignment.RIGHT)
|
||||||
alignment = GATKReportColumnFormat.Alignment.LEFT;
|
alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -313,7 +313,7 @@ public class RecalDataManager {
|
||||||
* @param read The read to adjust
|
* @param read The read to adjust
|
||||||
* @param RAC The list of shared command line arguments
|
* @param RAC The list of shared command line arguments
|
||||||
*/
|
*/
|
||||||
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
public static void parsePlatformForRead(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||||
GATKSAMReadGroupRecord readGroup = read.getReadGroup();
|
GATKSAMReadGroupRecord readGroup = read.getReadGroup();
|
||||||
|
|
||||||
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
|
||||||
|
|
@ -337,47 +337,47 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
|
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are
|
||||||
|
* inconsistent with the color space. If there is no call in the color space, this method returns true meaning
|
||||||
|
* this read should be skipped
|
||||||
*
|
*
|
||||||
* @param read The SAMRecord to parse
|
* @param strategy the strategy used for SOLID no calls
|
||||||
|
* @param read The SAMRecord to parse
|
||||||
|
* @return whether or not this read should be skipped
|
||||||
*/
|
*/
|
||||||
public static void parseColorSpace(final GATKSAMRecord read) {
|
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||||
|
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||||
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||||
if (ReadUtils.isSOLiDRead(read)) {
|
|
||||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
byte[] colorSpace;
|
byte[] colorSpace;
|
||||||
if (attr instanceof String) {
|
if (attr instanceof String)
|
||||||
colorSpace = ((String) attr).getBytes();
|
colorSpace = ((String) attr).getBytes();
|
||||||
}
|
else
|
||||||
else {
|
|
||||||
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
|
||||||
|
byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||||
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
if (read.getReadNegativeStrandFlag())
|
||||||
byte[] readBases = read.getReadBases();
|
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
|
||||||
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
||||||
}
|
|
||||||
final byte[] inconsistency = new byte[readBases.length];
|
final byte[] inconsistency = new byte[readBases.length];
|
||||||
int iii;
|
int i;
|
||||||
byte prevBase = colorSpace[0]; // The sentinel
|
byte prevBase = colorSpace[0]; // The sentinel
|
||||||
for (iii = 0; iii < readBases.length; iii++) {
|
for (i = 0; i < readBases.length; i++) {
|
||||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
|
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]);
|
||||||
inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1);
|
inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
|
||||||
prevBase = readBases[iii];
|
prevBase = readBases[i];
|
||||||
}
|
}
|
||||||
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||||
|
}
|
||||||
|
else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it
|
||||||
|
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||||
|
|
||||||
}
|
else
|
||||||
else {
|
return false; // otherwise, just skip the read
|
||||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
|
||||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,6 @@ public class RecalDatum extends RecalDatumOptimized {
|
||||||
|
|
||||||
public final void resetCalculatedQualities() {
|
public final void resetCalculatedQualities() {
|
||||||
empiricalQuality = 0.0;
|
empiricalQuality = 0.0;
|
||||||
estimatedQReported = 0.0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calcExpectedErrors() {
|
private double calcExpectedErrors() {
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ public class RecalibrationReport {
|
||||||
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||||
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
|
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
|
||||||
|
|
||||||
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||||
|
|
||||||
public RecalibrationReport(final File RECAL_FILE) {
|
public RecalibrationReport(final File RECAL_FILE) {
|
||||||
GATKReport report = new GATKReport(RECAL_FILE);
|
GATKReport report = new GATKReport(RECAL_FILE);
|
||||||
|
|
@ -77,10 +77,11 @@ public class RecalibrationReport {
|
||||||
/**
|
/**
|
||||||
* Combines two recalibration reports by adding all observations and errors
|
* Combines two recalibration reports by adding all observations and errors
|
||||||
*
|
*
|
||||||
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate them
|
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
|
||||||
* after combining. The reason for not calculating it is because this function is inteded for combining a series of
|
* them after combining. The reason for not calculating it is because this function is inteded for combining a
|
||||||
* recalibration reports, and it only makes sense to calculate the empirical qualities and quantized qualities after all
|
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
|
||||||
* the recalibration reports have been combined. Having the user recalculate when appropriate, makes this method faster
|
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
|
||||||
|
* makes this method faster
|
||||||
*
|
*
|
||||||
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
||||||
*
|
*
|
||||||
|
|
@ -97,7 +98,7 @@ public class RecalibrationReport {
|
||||||
if (thisDatum == null)
|
if (thisDatum == null)
|
||||||
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
|
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
|
||||||
else
|
else
|
||||||
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
|
thisDatum.combine(otherDatum); // add the two datum objects into 'this'
|
||||||
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
|
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
|
||||||
|
|
||||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
||||||
|
|
||||||
// initialize the header
|
// initialize the header
|
||||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,8 @@ import java.util.*;
|
||||||
|
|
||||||
public class UnifiedGenotyperEngine {
|
public class UnifiedGenotyperEngine {
|
||||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||||
|
|
||||||
|
public static final int DEFAULT_PLOIDY = 2;
|
||||||
|
|
||||||
public enum OUTPUT_MODE {
|
public enum OUTPUT_MODE {
|
||||||
/** produces calls only at variant sites */
|
/** produces calls only at variant sites */
|
||||||
|
|
@ -95,7 +97,8 @@ public class UnifiedGenotyperEngine {
|
||||||
private final Logger logger;
|
private final Logger logger;
|
||||||
private final PrintStream verboseWriter;
|
private final PrintStream verboseWriter;
|
||||||
|
|
||||||
// number of chromosomes (2 * samples) in input
|
// number of chromosomes (ploidy * samples) in input
|
||||||
|
private final int ploidy;
|
||||||
private final int N;
|
private final int N;
|
||||||
|
|
||||||
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
// the standard filter to use for calls below the confidence threshold but above the emit threshold
|
||||||
|
|
@ -112,11 +115,11 @@ public class UnifiedGenotyperEngine {
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
@Requires({"toolkit != null", "UAC != null"})
|
@Requires({"toolkit != null", "UAC != null"})
|
||||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
|
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int ploidy) {
|
||||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||||
genomeLocParser = toolkit.getGenomeLocParser();
|
genomeLocParser = toolkit.getGenomeLocParser();
|
||||||
this.samples = new TreeSet<String>(samples);
|
this.samples = new TreeSet<String>(samples);
|
||||||
|
|
@ -127,7 +130,8 @@ public class UnifiedGenotyperEngine {
|
||||||
this.verboseWriter = verboseWriter;
|
this.verboseWriter = verboseWriter;
|
||||||
this.annotationEngine = engine;
|
this.annotationEngine = engine;
|
||||||
|
|
||||||
this.N = N;
|
this.ploidy = ploidy;
|
||||||
|
this.N = samples.size() * ploidy;
|
||||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class BQSRGathererUnitTest {
|
||||||
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
|
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
|
||||||
*/
|
*/
|
||||||
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
|
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
|
||||||
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
|
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
|
||||||
for (String column : columnsToTest) {
|
for (String column : columnsToTest) {
|
||||||
Object actual = calculated.get(primaryKey, column);
|
Object actual = calculated.get(primaryKey, column);
|
||||||
Object expected = original.get(primaryKey, column);
|
Object expected = original.get(primaryKey, column);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue