-Push the --use_original_qualities argument into the engine.
-Check that base and qual strings are the same lengths -Fix one more bug in the clipper. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3006 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
035d4170aa
commit
202231141c
|
|
@ -119,6 +119,10 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus (use max_reads_at_locus to stop the engine from reading in all reads)", required = false)
|
||||
public Integer downsampleCoverage = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
|
||||
public Boolean useOriginalBaseQualities = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
|
|
|||
|
|
@ -43,9 +43,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
|||
@Argument(fullName="read", doc="", required=false)
|
||||
String onlyDoRead = null;
|
||||
|
||||
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc="If set, use the original base quality scores", required=false)
|
||||
boolean USE_ORIGINAL_QUALS = false;
|
||||
|
||||
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
|
||||
//boolean keepCompletelyClippedReads = false;
|
||||
|
||||
|
|
@ -55,7 +52,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
|||
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
|
||||
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
|
||||
|
||||
private final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
||||
|
||||
/**
|
||||
* List of sequence that should be clipped from the reads
|
||||
|
|
@ -257,21 +253,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
|||
private void clipBadQualityScores(ReadClipper clipper) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
int readLen = read.getReadBases().length;
|
||||
byte[] quals = null;
|
||||
|
||||
if ( USE_ORIGINAL_QUALS ) {
|
||||
final Object attr = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if ( attr != null ) {
|
||||
if ( attr instanceof String ) {
|
||||
quals = QualityUtils.fastqToPhred((String)attr);
|
||||
} else {
|
||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
// if we don't want original quals or couldn't find them, use the main quals
|
||||
if ( quals == null )
|
||||
quals = read.getBaseQualities();
|
||||
byte[] quals = read.getBaseQualities();
|
||||
|
||||
|
||||
int clipSum = 0, lastMax = -1, clipPoint = -1; // -1 means no clip
|
||||
|
|
@ -399,20 +381,30 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
|||
*/
|
||||
public void apply(ClippingRepresentation algorithm, SAMRecord clippedRead) {
|
||||
//clippedRead.setReferenceIndex(1);
|
||||
byte[] quals = clippedRead.getBaseQualities();
|
||||
byte[] bases = clippedRead.getReadBases();
|
||||
|
||||
switch (algorithm) {
|
||||
// important note:
|
||||
// it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
|
||||
// because you're not guaranteed to get a pointer to the actual array of bytes in the SAMRecord
|
||||
case WRITE_NS:
|
||||
for (int i = start; i <= stop; i++)
|
||||
clippedRead.getReadBases()[i] = 'N';
|
||||
bases[i] = 'N';
|
||||
clippedRead.setReadBases(bases);
|
||||
break;
|
||||
case WRITE_Q0S:
|
||||
for (int i = start; i <= stop; i++)
|
||||
clippedRead.getBaseQualities()[i] = 0;
|
||||
quals[i] = 0;
|
||||
clippedRead.setBaseQualities(quals);
|
||||
break;
|
||||
case WRITE_NS_Q0S:
|
||||
for (int i = start; i <= stop; i++) {
|
||||
clippedRead.getReadBases()[i] = 'N';
|
||||
clippedRead.getBaseQualities()[i] = 0;
|
||||
bases[i] = 'N';
|
||||
quals[i] = 0;
|
||||
}
|
||||
clippedRead.setReadBases(bases);
|
||||
clippedRead.setBaseQualities(quals);
|
||||
break;
|
||||
case SOFTCLIP_BASES:
|
||||
if ( ! clippedRead.getReadUnmappedFlag() ) {
|
||||
|
|
|
|||
|
|
@ -201,18 +201,6 @@ public class RecalDataManager {
|
|||
*/
|
||||
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
|
||||
|
||||
// Check if we need to use the original quality scores instead
|
||||
if( RAC.USE_ORIGINAL_QUALS ) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if( attr != null ) {
|
||||
if( attr instanceof String ) {
|
||||
read.setBaseQualities( QualityUtils.fastqToPhred((String)attr) );
|
||||
} else {
|
||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
|
||||
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
||||
|
|
|
|||
|
|
@ -43,8 +43,6 @@ public class RecalibrationArgumentCollection {
|
|||
//////////////////////////////////
|
||||
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
|
||||
public String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
|
||||
public boolean USE_ORIGINAL_QUALS = false;
|
||||
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
|
||||
public String DEFAULT_READ_GROUP = null;
|
||||
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -41,6 +45,8 @@ public class GATKSAMRecord extends SAMRecord {
|
|||
// These attributes exist in memory only, and are never written to disk.
|
||||
private Map<Object, Object> temporaryAttributes;
|
||||
|
||||
private final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
||||
|
||||
public GATKSAMRecord(SAMRecord record) {
|
||||
super(null); // it doesn't matter - this isn't used
|
||||
if ( record == null )
|
||||
|
|
@ -118,14 +124,34 @@ public class GATKSAMRecord extends SAMRecord {
|
|||
}
|
||||
|
||||
public byte[] getBaseQualities() {
|
||||
if ( mQualities == null )
|
||||
mQualities = mRecord.getBaseQualities();
|
||||
if ( mQualities == null ) {
|
||||
if ( GenomeAnalysisEngine.instance.getArguments().useOriginalBaseQualities ) {
|
||||
final Object attr = mRecord.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if ( attr != null ) {
|
||||
if ( attr instanceof String ) {
|
||||
mQualities = QualityUtils.fastqToPhred((String)attr);
|
||||
} else {
|
||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, mRecord.getReadName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
// if we don't want original quals or couldn't find them, use the main quals
|
||||
if ( mQualities == null )
|
||||
mQualities = mRecord.getBaseQualities();
|
||||
|
||||
// sanity check that the lengths of the base and quality strings are equal
|
||||
if ( mQualities.length != getReadLength() )
|
||||
throw new StingException(String.format("Error: the number of base qualities does not match the number of bases in %s (and the GATK does not currently support '*' for the quals)", mRecord.getReadName()));
|
||||
}
|
||||
return mQualities;
|
||||
}
|
||||
|
||||
public String getBaseQualityString() {
|
||||
if ( mQualString == null )
|
||||
mQualString = mRecord.getBaseQualityString();
|
||||
if ( mQualString == null ) {
|
||||
final byte[] quals = getBaseQualities();
|
||||
if ( quals.length > 0 )
|
||||
mQualString = StringUtil.bytesToString(quals);
|
||||
}
|
||||
return mQualString;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
|
|||
" -OQ -QT 4" +
|
||||
" -o %s -ob %s",
|
||||
2,
|
||||
Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "f9b1347fabbc33bb24f7c7fa8dfb798b"));
|
||||
Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "ff7c6edba61307738b34786e14edbef9"));
|
||||
executeTest("clipOriginalQuals", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue