added BinaryTagCovariate for ancient dna analysis

This commit is contained in:
Mauricio Carneiro 2012-07-06 15:03:20 -04:00
parent e93b025b39
commit 125e6c1a47
4 changed files with 72 additions and 1 deletions

View File

@ -0,0 +1,61 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Binary covariate allows BQSR to recalibrate based on a binary covariate in the BAM file. This covariate should assume values of 1 and 0.
*
* @author Mauricio Carneiro
* @since 7/6/12
*/
public class BinaryTagCovariate implements StandardCovariate {
private String tag;
@Override
public void initialize(RecalibrationArgumentCollection RAC) {
tag = RAC.BINARY_TAG_NAME;
}
@Override
public void recordValues(GATKSAMRecord read, ReadCovariates values) {
final Object tagObject = read.getAttribute(tag);
byte[] binaryTag;
if (tagObject instanceof byte[])
binaryTag = (byte[]) tagObject;
else if (tagObject instanceof String) {
int readLength = ((String) tagObject).length();
binaryTag = new byte[readLength];
for (int i = 0; i<readLength; i++)
binaryTag[i] = Byte.decode(((String) tagObject).substring(i, i+1));
}
else
throw new UserException("Binary tag is not a byte array (fast) or a string (slow). Type not supported");
for (int i = 0; i < read.getReadLength(); i++) {
values.addCovariate((int) binaryTag[i], (int) binaryTag[i], (int) binaryTag[i], i);
}
}
@Override
public Object getValue(String str) {
return Integer.decode(str);
}
@Override
public String formatKey(int key) {
return String.format("%d", key);
}
@Override
public int keyFromValue(Object value) {
return Integer.decode((String) value);
}
@Override
public int maximumKeyValue() {
return 1;
}
}

View File

@ -55,7 +55,7 @@ public interface Covariate {
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
/**
* Used to get the covariate's value from input csv file during on-the-fly recalibration
* Used to get the covariate's value from input (Recalibration Report) file during on-the-fly recalibration
*
* @param str the key in string type (read from the csv)
* @return the key in it's correct type.

View File

@ -153,6 +153,11 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
public int QUANTIZING_LEVELS = 16;
/**
* The tag name for the binary tag covariate (if using it)
*/
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
public String BINARY_TAG_NAME = null;
@Hidden
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
@ -205,6 +210,8 @@ public class RecalibrationArgumentCollection {
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
argumentsTable.addRowID("recalibration_report", true);
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
argumentsTable.addRowID("binary_tag_name", true);
argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
return argumentsTable;
}

View File

@ -290,6 +290,9 @@ public class RecalibrationReport {
else if (argument.equals("recalibration_report"))
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
}
return RAC;