Added a new option in the recalibrator to be used by people who have SOLiD data in which only a few of the reads have no-calls in the color space. These reads will be skipped over and left in the bam file untouched.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2857 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b1a4e6d840
commit
7f19ff1fa1
|
|
@ -263,6 +263,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
offset = p.getOffset();
|
offset = p.getOffset();
|
||||||
|
|
||||||
RecalDataManager.parseSAMRecord( read, RAC );
|
RecalDataManager.parseSAMRecord( read, RAC );
|
||||||
|
|
||||||
|
// Skip over reads with no calls in the color space if the user requested it
|
||||||
|
if( !RAC.IGNORE_NOCALL_COLORSPACE || !RecalDataManager.checkNoCallColorSpace( read ) ) {
|
||||||
RecalDataManager.parseColorSpace( read );
|
RecalDataManager.parseColorSpace( read );
|
||||||
|
|
||||||
// Skip if base quality is zero
|
// Skip if base quality is zero
|
||||||
|
|
@ -290,6 +293,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
countedSites++;
|
countedSites++;
|
||||||
} else { // We skipped over the dbSNP site, and we are only processing every Nth locus
|
} else { // We skipped over the dbSNP site, and we are only processing every Nth locus
|
||||||
skippedSites++;
|
skippedSites++;
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,8 @@ public class RecalDataManager {
|
||||||
final Object val = data.get(comp);
|
final Object val = data.get(comp);
|
||||||
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
||||||
if( data.keySet().size() == 1) {
|
if( data.keySet().size() == 1) {
|
||||||
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done in a previous sequential step
|
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ...
|
||||||
|
// in a previous step of the sequential calculation model
|
||||||
}
|
}
|
||||||
} else { // Another layer in the nested hash map
|
} else { // Another layer in the nested hash map
|
||||||
checkForSingletons( (Map) val );
|
checkForSingletons( (Map) val );
|
||||||
|
|
@ -358,6 +359,32 @@ public class RecalDataManager {
|
||||||
return originalQualScores;
|
return originalQualScores;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean checkNoCallColorSpace( final SAMRecord read ) {
|
||||||
|
if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
|
||||||
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
|
if( attr != null ) {
|
||||||
|
char[] colorSpace;
|
||||||
|
if( attr instanceof String ) {
|
||||||
|
colorSpace = ((String)attr).substring(1).toCharArray(); // trim off the Sentinel
|
||||||
|
} else {
|
||||||
|
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
for( char color : colorSpace ) {
|
||||||
|
if( color != '0' && color != '1' && color != '2' && color != '3' ) {
|
||||||
|
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||||
|
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false; // There aren't any color no calls in this SOLiD read
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
|
* Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
|
||||||
* @param read The SAMRecord to recalibrate
|
* @param read The SAMRecord to recalibrate
|
||||||
|
|
@ -516,7 +543,7 @@ public class RecalDataManager {
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
} else {
|
} else { // No inconsistency array, so nothing is inconsistent
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,9 @@ public class RecalibrationArgumentCollection {
|
||||||
public int HOMOPOLYMER_NBACK = 7;
|
public int HOMOPOLYMER_NBACK = 7;
|
||||||
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
||||||
public boolean EXCEPTION_IF_NO_TILE = false;
|
public boolean EXCEPTION_IF_NO_TILE = false;
|
||||||
|
@Argument(fullName = "ignore_nocall_colorspace", shortName="ignore_nocall_colorspace", doc="If provided, the recalibrator will skip over reads with no calls in the color space instead of halting with an exception", required=false)
|
||||||
|
public boolean IGNORE_NOCALL_COLORSPACE = false;
|
||||||
|
|
||||||
|
|
||||||
public final boolean checkSolidRecalMode() {
|
public final boolean checkSolidRecalMode() {
|
||||||
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||||
private static final long RANDOM_SEED = 1032861495;
|
private static final long RANDOM_SEED = 1032861495;
|
||||||
private final Random coinFlip = new Random( RANDOM_SEED ); // Random number generator is used to remove reference bias in solid bams
|
private final Random coinFlip = new Random( RANDOM_SEED ); // Random number generator is used to remove reference bias in solid bams
|
||||||
|
private long numReadsWithMalformedColorSpace = 0;
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -300,6 +301,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
final String platform = read.getReadGroup().getPlatform();
|
final String platform = read.getReadGroup().getPlatform();
|
||||||
if( platform.toUpperCase().contains("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
|
if( platform.toUpperCase().contains("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
|
||||||
|
if( RAC.IGNORE_NOCALL_COLORSPACE ) {
|
||||||
|
final boolean badColor = RecalDataManager.checkNoCallColorSpace( read );
|
||||||
|
if( badColor ) {
|
||||||
|
numReadsWithMalformedColorSpace++;
|
||||||
|
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
|
||||||
|
}
|
||||||
|
}
|
||||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
|
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -452,5 +460,11 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
* @param output The SAMFileWriter that outputs the bam file
|
* @param output The SAMFileWriter that outputs the bam file
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone(SAMFileWriter output) {
|
public void onTraversalDone(SAMFileWriter output) {
|
||||||
|
if( numReadsWithMalformedColorSpace != 0 ) {
|
||||||
|
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||||
|
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||||
|
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||||
|
"These reads remain in the output bam file but haven't been corrected for reference bias. Use at your own risk.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -182,6 +182,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -I " + bam +
|
" -I " + bam +
|
||||||
" -L 1:10,000,000-10,200,000" +
|
" -L 1:10,000,000-10,200,000" +
|
||||||
" -standard" +
|
" -standard" +
|
||||||
|
" --ignore_nocall_colorspace" +
|
||||||
" --solid_recal_mode SET_Q_ZERO" +
|
" --solid_recal_mode SET_Q_ZERO" +
|
||||||
" -recalFile %s",
|
" -recalFile %s",
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue