-Push the --use_original_qualities argument into the engine.
-Check that base and qual strings are the same lengths -Fix one more bug in the clipper. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3006 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
035d4170aa
commit
202231141c
|
|
@ -119,6 +119,10 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus (use max_reads_at_locus to stop the engine from reading in all reads)", required = false)
|
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus (use max_reads_at_locus to stop the engine from reading in all reads)", required = false)
|
||||||
public Integer downsampleCoverage = null;
|
public Integer downsampleCoverage = null;
|
||||||
|
|
||||||
|
@Element(required = false)
|
||||||
|
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
|
||||||
|
public Boolean useOriginalBaseQualities = false;
|
||||||
|
|
||||||
@Element(required = false)
|
@Element(required = false)
|
||||||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||||
|
|
|
||||||
|
|
@ -43,9 +43,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
||||||
@Argument(fullName="read", doc="", required=false)
|
@Argument(fullName="read", doc="", required=false)
|
||||||
String onlyDoRead = null;
|
String onlyDoRead = null;
|
||||||
|
|
||||||
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc="If set, use the original base quality scores", required=false)
|
|
||||||
boolean USE_ORIGINAL_QUALS = false;
|
|
||||||
|
|
||||||
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
|
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
|
||||||
//boolean keepCompletelyClippedReads = false;
|
//boolean keepCompletelyClippedReads = false;
|
||||||
|
|
||||||
|
|
@ -55,7 +52,6 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
||||||
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
|
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
|
||||||
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
|
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
|
||||||
|
|
||||||
private final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of sequence that should be clipped from the reads
|
* List of sequence that should be clipped from the reads
|
||||||
|
|
@ -257,21 +253,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
||||||
private void clipBadQualityScores(ReadClipper clipper) {
|
private void clipBadQualityScores(ReadClipper clipper) {
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
int readLen = read.getReadBases().length;
|
int readLen = read.getReadBases().length;
|
||||||
byte[] quals = null;
|
byte[] quals = read.getBaseQualities();
|
||||||
|
|
||||||
if ( USE_ORIGINAL_QUALS ) {
|
|
||||||
final Object attr = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
|
||||||
if ( attr != null ) {
|
|
||||||
if ( attr instanceof String ) {
|
|
||||||
quals = QualityUtils.fastqToPhred((String)attr);
|
|
||||||
} else {
|
|
||||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if we don't want original quals or couldn't find them, use the main quals
|
|
||||||
if ( quals == null )
|
|
||||||
quals = read.getBaseQualities();
|
|
||||||
|
|
||||||
|
|
||||||
int clipSum = 0, lastMax = -1, clipPoint = -1; // -1 means no clip
|
int clipSum = 0, lastMax = -1, clipPoint = -1; // -1 means no clip
|
||||||
|
|
@ -399,20 +381,30 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
|
||||||
*/
|
*/
|
||||||
public void apply(ClippingRepresentation algorithm, SAMRecord clippedRead) {
|
public void apply(ClippingRepresentation algorithm, SAMRecord clippedRead) {
|
||||||
//clippedRead.setReferenceIndex(1);
|
//clippedRead.setReferenceIndex(1);
|
||||||
|
byte[] quals = clippedRead.getBaseQualities();
|
||||||
|
byte[] bases = clippedRead.getReadBases();
|
||||||
|
|
||||||
switch (algorithm) {
|
switch (algorithm) {
|
||||||
|
// important note:
|
||||||
|
// it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
|
||||||
|
// because you're not guaranteed to get a pointer to the actual array of bytes in the SAMRecord
|
||||||
case WRITE_NS:
|
case WRITE_NS:
|
||||||
for (int i = start; i <= stop; i++)
|
for (int i = start; i <= stop; i++)
|
||||||
clippedRead.getReadBases()[i] = 'N';
|
bases[i] = 'N';
|
||||||
|
clippedRead.setReadBases(bases);
|
||||||
break;
|
break;
|
||||||
case WRITE_Q0S:
|
case WRITE_Q0S:
|
||||||
for (int i = start; i <= stop; i++)
|
for (int i = start; i <= stop; i++)
|
||||||
clippedRead.getBaseQualities()[i] = 0;
|
quals[i] = 0;
|
||||||
|
clippedRead.setBaseQualities(quals);
|
||||||
break;
|
break;
|
||||||
case WRITE_NS_Q0S:
|
case WRITE_NS_Q0S:
|
||||||
for (int i = start; i <= stop; i++) {
|
for (int i = start; i <= stop; i++) {
|
||||||
clippedRead.getReadBases()[i] = 'N';
|
bases[i] = 'N';
|
||||||
clippedRead.getBaseQualities()[i] = 0;
|
quals[i] = 0;
|
||||||
}
|
}
|
||||||
|
clippedRead.setReadBases(bases);
|
||||||
|
clippedRead.setBaseQualities(quals);
|
||||||
break;
|
break;
|
||||||
case SOFTCLIP_BASES:
|
case SOFTCLIP_BASES:
|
||||||
if ( ! clippedRead.getReadUnmappedFlag() ) {
|
if ( ! clippedRead.getReadUnmappedFlag() ) {
|
||||||
|
|
|
||||||
|
|
@ -201,18 +201,6 @@ public class RecalDataManager {
|
||||||
*/
|
*/
|
||||||
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
|
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
|
||||||
|
|
||||||
// Check if we need to use the original quality scores instead
|
|
||||||
if( RAC.USE_ORIGINAL_QUALS ) {
|
|
||||||
final Object attr = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
|
||||||
if( attr != null ) {
|
|
||||||
if( attr instanceof String ) {
|
|
||||||
read.setBaseQualities( QualityUtils.fastqToPhred((String)attr) );
|
|
||||||
} else {
|
|
||||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||||
|
|
||||||
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,6 @@ public class RecalibrationArgumentCollection {
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
|
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
|
||||||
public String RECAL_FILE = "output.recal_data.csv";
|
public String RECAL_FILE = "output.recal_data.csv";
|
||||||
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
|
|
||||||
public boolean USE_ORIGINAL_QUALS = false;
|
|
||||||
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
|
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
|
||||||
public String DEFAULT_READ_GROUP = null;
|
public String DEFAULT_READ_GROUP = null;
|
||||||
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,10 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author ebanks
|
* @author ebanks
|
||||||
|
|
@ -41,6 +45,8 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
// These attributes exist in memory only, and are never written to disk.
|
// These attributes exist in memory only, and are never written to disk.
|
||||||
private Map<Object, Object> temporaryAttributes;
|
private Map<Object, Object> temporaryAttributes;
|
||||||
|
|
||||||
|
private final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
|
||||||
|
|
||||||
public GATKSAMRecord(SAMRecord record) {
|
public GATKSAMRecord(SAMRecord record) {
|
||||||
super(null); // it doesn't matter - this isn't used
|
super(null); // it doesn't matter - this isn't used
|
||||||
if ( record == null )
|
if ( record == null )
|
||||||
|
|
@ -118,14 +124,34 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getBaseQualities() {
|
public byte[] getBaseQualities() {
|
||||||
if ( mQualities == null )
|
if ( mQualities == null ) {
|
||||||
mQualities = mRecord.getBaseQualities();
|
if ( GenomeAnalysisEngine.instance.getArguments().useOriginalBaseQualities ) {
|
||||||
|
final Object attr = mRecord.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||||
|
if ( attr != null ) {
|
||||||
|
if ( attr instanceof String ) {
|
||||||
|
mQualities = QualityUtils.fastqToPhred((String)attr);
|
||||||
|
} else {
|
||||||
|
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, mRecord.getReadName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if we don't want original quals or couldn't find them, use the main quals
|
||||||
|
if ( mQualities == null )
|
||||||
|
mQualities = mRecord.getBaseQualities();
|
||||||
|
|
||||||
|
// sanity check that the lengths of the base and quality strings are equal
|
||||||
|
if ( mQualities.length != getReadLength() )
|
||||||
|
throw new StingException(String.format("Error: the number of base qualities does not match the number of bases in %s (and the GATK does not currently support '*' for the quals)", mRecord.getReadName()));
|
||||||
|
}
|
||||||
return mQualities;
|
return mQualities;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getBaseQualityString() {
|
public String getBaseQualityString() {
|
||||||
if ( mQualString == null )
|
if ( mQualString == null ) {
|
||||||
mQualString = mRecord.getBaseQualityString();
|
final byte[] quals = getBaseQualities();
|
||||||
|
if ( quals.length > 0 )
|
||||||
|
mQualString = StringUtil.bytesToString(quals);
|
||||||
|
}
|
||||||
return mQualString;
|
return mQualString;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
|
||||||
" -OQ -QT 4" +
|
" -OQ -QT 4" +
|
||||||
" -o %s -ob %s",
|
" -o %s -ob %s",
|
||||||
2,
|
2,
|
||||||
Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "f9b1347fabbc33bb24f7c7fa8dfb798b"));
|
Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "ff7c6edba61307738b34786e14edbef9"));
|
||||||
executeTest("clipOriginalQuals", spec);
|
executeTest("clipOriginalQuals", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue