-Push the --use_original_qualities argument into the engine.

-Check that base and qual strings are the same lengths
-Fix one more bug in the clipper.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3006 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-16 02:06:11 +00:00
parent 035d4170aa
commit 202231141c
6 changed files with 50 additions and 42 deletions

View File

@ -119,6 +119,10 @@ public class GATKArgumentCollection {
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus (use max_reads_at_locus to stop the engine from reading in all reads)", required = false)
public Integer downsampleCoverage = null;
@Element(required = false)
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
public Boolean useOriginalBaseQualities = false;
@Element(required = false)
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;

View File

@ -43,9 +43,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
@Argument(fullName="read", doc="", required=false)
String onlyDoRead = null;
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc="If set, use the original base quality scores", required=false)
boolean USE_ORIGINAL_QUALS = false;
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
//boolean keepCompletelyClippedReads = false;
@ -55,7 +52,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
private final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
/**
* List of sequence that should be clipped from the reads
@ -257,21 +253,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
private void clipBadQualityScores(ReadClipper clipper) {
SAMRecord read = clipper.getRead();
int readLen = read.getReadBases().length;
byte[] quals = null;
if ( USE_ORIGINAL_QUALS ) {
final Object attr = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
if ( attr != null ) {
if ( attr instanceof String ) {
quals = QualityUtils.fastqToPhred((String)attr);
} else {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
}
}
// if we don't want original quals or couldn't find them, use the main quals
if ( quals == null )
quals = read.getBaseQualities();
byte[] quals = read.getBaseQualities();
int clipSum = 0, lastMax = -1, clipPoint = -1; // -1 means no clip
@ -399,20 +381,30 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
*/
public void apply(ClippingRepresentation algorithm, SAMRecord clippedRead) {
//clippedRead.setReferenceIndex(1);
byte[] quals = clippedRead.getBaseQualities();
byte[] bases = clippedRead.getReadBases();
switch (algorithm) {
// important note:
// it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
// because you're not guaranteed to get a pointer to the actual array of bytes in the SAMRecord
case WRITE_NS:
for (int i = start; i <= stop; i++)
clippedRead.getReadBases()[i] = 'N';
bases[i] = 'N';
clippedRead.setReadBases(bases);
break;
case WRITE_Q0S:
for (int i = start; i <= stop; i++)
clippedRead.getBaseQualities()[i] = 0;
quals[i] = 0;
clippedRead.setBaseQualities(quals);
break;
case WRITE_NS_Q0S:
for (int i = start; i <= stop; i++) {
clippedRead.getReadBases()[i] = 'N';
clippedRead.getBaseQualities()[i] = 0;
bases[i] = 'N';
quals[i] = 0;
}
clippedRead.setReadBases(bases);
clippedRead.setBaseQualities(quals);
break;
case SOFTCLIP_BASES:
if ( ! clippedRead.getReadUnmappedFlag() ) {

View File

@ -201,18 +201,6 @@ public class RecalDataManager {
*/
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
// Check if we need to use the original quality scores instead
if( RAC.USE_ORIGINAL_QUALS ) {
final Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
if( attr != null ) {
if( attr instanceof String ) {
read.setBaseQualities( QualityUtils.fastqToPhred((String)attr) );
} else {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
}
}
SAMReadGroupRecord readGroup = read.getReadGroup();
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments

View File

@ -43,8 +43,6 @@ public class RecalibrationArgumentCollection {
//////////////////////////////////
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
public String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
public boolean USE_ORIGINAL_QUALS = false;
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
public String DEFAULT_READ_GROUP = null;
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")

View File

@ -5,6 +5,10 @@ import java.util.List;
import java.util.Map;
import net.sf.samtools.*;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
/**
* @author ebanks
@ -41,6 +45,8 @@ public class GATKSAMRecord extends SAMRecord {
// These attributes exist in memory only, and are never written to disk.
private Map<Object, Object> temporaryAttributes;
private final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public GATKSAMRecord(SAMRecord record) {
super(null); // it doesn't matter - this isn't used
if ( record == null )
@ -118,14 +124,34 @@ public class GATKSAMRecord extends SAMRecord {
}
public byte[] getBaseQualities() {
if ( mQualities == null )
mQualities = mRecord.getBaseQualities();
if ( mQualities == null ) {
if ( GenomeAnalysisEngine.instance.getArguments().useOriginalBaseQualities ) {
final Object attr = mRecord.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
if ( attr != null ) {
if ( attr instanceof String ) {
mQualities = QualityUtils.fastqToPhred((String)attr);
} else {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, mRecord.getReadName()));
}
}
}
// if we don't want original quals or couldn't find them, use the main quals
if ( mQualities == null )
mQualities = mRecord.getBaseQualities();
// sanity check that the lengths of the base and quality strings are equal
if ( mQualities.length != getReadLength() )
throw new StingException(String.format("Error: the number of base qualities does not match the number of bases in %s (and the GATK does not currently support '*' for the quals)", mRecord.getReadName()));
}
return mQualities;
}
public String getBaseQualityString() {
if ( mQualString == null )
mQualString = mRecord.getBaseQualityString();
if ( mQualString == null ) {
final byte[] quals = getBaseQualities();
if ( quals.length > 0 )
mQualString = StringUtil.bytesToString(quals);
}
return mQualString;
}

View File

@ -50,7 +50,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
" -OQ -QT 4" +
" -o %s -ob %s",
2,
Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "f9b1347fabbc33bb24f7c7fa8dfb798b"));
Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "ff7c6edba61307738b34786e14edbef9"));
executeTest("clipOriginalQuals", spec);
}
}