Moved around private walkers into appropriate directories in private gatk.walkers. Moved a few public walkers into private qc package, and some private qc walkers into the public directory. Removed several obviously broken and/or unused walkers.

This commit is contained in:
Mark A. DePristo 2011-06-30 14:59:58 -04:00
parent f4ae6edb92
commit defa3cfe85
6 changed files with 373 additions and 461 deletions

View File

@ -0,0 +1,143 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.commandline.Argument;
import java.io.File;
import java.util.*;
@By(DataSource.READS)
// walker to count realigned reads
public class RealignedReadCounter extends ReadWalker<Integer, Integer> {
public static final String ORIGINAL_CIGAR_TAG = "OC";
public static final String ORIGINAL_POSITION_TAG = "OP";
@Argument(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
protected String intervalsFile = null;
// the intervals input by the user
private Iterator<GenomeLoc> intervals = null;
// the current interval in the list
private GenomeLoc currentInterval = null;
private long updatedIntervals = 0, updatedReads = 0, affectedBases = 0;
private boolean intervalWasUpdated = false;
public void initialize() {
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
intervals = new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
currentInterval = intervals.hasNext() ? intervals.next() : null;
}
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( currentInterval == null ) {
return 0;
}
GenomeLoc readLoc = ref.getGenomeLocParser().createGenomeLoc(read);
// hack to get around unmapped reads having screwy locations
if ( readLoc.getStop() == 0 )
readLoc = ref.getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) )
return 0;
if ( readLoc.overlapsP(currentInterval) ) {
if ( doNotTryToClean(read) )
return 0;
if ( read.getAttribute(ORIGINAL_CIGAR_TAG) != null ) {
String newCigar = (String)read.getAttribute(ORIGINAL_CIGAR_TAG);
// deal with an old bug
if ( read.getCigar().toString().equals(newCigar) ) {
//System.out.println(currentInterval + ": " + read.getReadName() + " " + read.getCigarString() + " " + newCigar);
return 0;
}
if ( !intervalWasUpdated ) {
intervalWasUpdated = true;
updatedIntervals++;
affectedBases += 20 + getIndelSize(read);
}
updatedReads++;
}
} else {
do {
intervalWasUpdated = false;
currentInterval = intervals.hasNext() ? intervals.next() : null;
} while ( currentInterval != null && currentInterval.isBefore(readLoc) );
}
return 0;
}
private int getIndelSize(SAMRecord read) {
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
if ( ce.getOperator() == CigarOperator.I )
return 0;
if ( ce.getOperator() == CigarOperator.D )
return ce.getLength();
}
logger.warn("We didn't see an indel for this read: " + read.getReadName() + " " + read.getAlignmentStart() + " " + read.getCigar());
return 0;
}
private boolean doNotTryToClean(SAMRecord read) {
return read.getReadUnmappedFlag() ||
read.getNotPrimaryAlignmentFlag() ||
read.getReadFailsVendorQualityCheckFlag() ||
read.getMappingQuality() == 0 ||
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ||
(BadMateFilter.hasBadMate(read));
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
public void onTraversalDone(Integer result) {
System.out.println(updatedIntervals + " intervals were updated");
System.out.println(updatedReads + " reads were updated");
System.out.println(affectedBases + " bases were affected");
}
}

View File

@ -0,0 +1,62 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.List;
import java.io.PrintStream;
/**
* Counts the number of contiguous regions the walker traverses over. Slower than it needs to be, but
* very useful since overlapping intervals get merged, so you can count the number of intervals the GATK merges down to.
* This was its very first use.
*/
public class CountIntervals extends RefWalker<Long, Long> {
@Output
PrintStream out;
@Argument(fullName="numOverlaps",shortName="no",doc="Count all occurrences of X or more overlapping intervals; defaults to 2", required=false)
int numOverlaps = 2;
public Long reduceInit() {
return 0l;
}
public boolean isReduceByInterval() { return true; }
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) {
return null;
}
List<GATKFeature> checkIntervals = tracker.getGATKFeatureMetaData("check",false);
return (long) checkIntervals.size();
}
public Long reduce(Long loc, Long prev) {
if ( loc == null ) {
return 0l;
} else {
return Math.max(prev,loc);
}
}
public void onTraversalDone(List<Pair<GenomeLoc,Long>> finalReduce) {
long count = 0;
for ( Pair<GenomeLoc,Long> g : finalReduce ) {
if ( g.second >= numOverlaps) {
count ++;
}
}
out.printf("Number of contiguous intervals: %d",count);
}
}

View File

@ -1,157 +0,0 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SimpleTimer;
import java.io.PrintStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.*;
/**
* Emits specific fields as dictated by the user from one or more VCF files.
*/
@Requires(value={})
public class ProfileRodSystem extends RodWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
@Argument(fullName="nIterations", shortName="N", doc="Number of raw reading iterations to perform", required=false)
int nIterations = 1;
@Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false)
int MAX_RECORDS = -1;
SimpleTimer timer = new SimpleTimer("myTimer");
public void initialize() {
File rodFile = getRodFile();
out.printf("# walltime is in seconds%n");
out.printf("# file is %s%n", rodFile);
out.printf("# file size is %d bytes%n", rodFile.length());
out.printf("operation\titeration\twalltime%n");
for ( int i = 0; i < nIterations; i++ ) {
out.printf("read.bytes\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_BYTE));
out.printf("read.line\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_LINE));
out.printf("line.and.parts\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_PARTS));
out.printf("decode.loc\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE_LOC));
out.printf("full.decode\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE));
}
timer.start(); // start up timer for map itself
}
private enum ReadMode { BY_BYTE, BY_LINE, BY_PARTS, DECODE_LOC, DECODE };
private final double readFile(File f, ReadMode mode) {
timer.start();
try {
byte[] data = new byte[100000];
FileInputStream s = new FileInputStream(f);
if ( mode == ReadMode.BY_BYTE ) {
while (true) {
if ( s.read(data) == -1 )
break;
}
} else {
int counter = 0;
VCFCodec codec = new VCFCodec();
String[] parts = new String[100000];
AsciiLineReader lineReader = new AsciiLineReader(s);
if ( mode == ReadMode.DECODE_LOC || mode == ReadMode.DECODE )
codec.readHeader(lineReader);
while (counter++ < MAX_RECORDS || MAX_RECORDS == -1) {
String line = lineReader.readLine();
if ( line == null )
break;
else if ( mode == ReadMode.BY_PARTS ) {
ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR);
}
else if ( mode == ReadMode.DECODE_LOC ) {
codec.decodeLoc(line);
}
else if ( mode == ReadMode.DECODE ) {
processOneVC((VariantContext)codec.decode(line));
}
}
}
} catch ( Exception e ) {
throw new RuntimeException(e);
}
return timer.getElapsedTime();
}
private File getRodFile() {
List<ReferenceOrderedDataSource> rods = this.getToolkit().getRodDataSources();
ReferenceOrderedDataSource rod = rods.get(0);
return rod.getFile();
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) // RodWalkers can make funky map calls
return 0;
VariantContext vc = tracker.getVariantContext(ref, "rod", context.getLocation());
processOneVC(vc);
return 0;
}
private static final void processOneVC(VariantContext vc) {
vc.getNSamples(); // force us to parse the samples
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer counter, Integer sum) {
return counter + sum;
}
public void onTraversalDone(Integer sum) {
out.printf("gatk.traversal\t%d\t%.2f%n", 0, timer.getElapsedTime());
}
}

View File

@ -0,0 +1,153 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.List;
/**
* a walker for validating (in the style of validating pile-up) the ROD system.
*/
@Reference(window=@Window(start=-40,stop=40))
public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
// the divider to use in some of the text output
private static final String DIVIDER = ",";
@Output
public PrintStream out;
@Argument(fullName="PerLocusEqual",required=false,doc="Should we check that all records at the same site produce equivilent variant contexts")
public boolean allRecordsVariantContextEquivalent = false;
// used to calculate the MD5 of a file
MessageDigest digest = null;
// we sometimes need to know what rods the engine's seen
List<ReferenceOrderedDataSource> rodList;
/**
* emit the md5 sums for each of the input ROD files (will save up a lot of time if and when the ROD files change
* underneath us).
*/
public void initialize() {
// setup the MD5-er
try {
digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new ReviewedStingException("Unable to find MD5 checksumer");
}
out.println("Header:");
// enumerate the list of ROD's we've loaded
rodList = this.getToolkit().getRodDataSources();
for (ReferenceOrderedDataSource rod : rodList) {
out.println(rod.getName() + DIVIDER + rod.getType());
out.println(rod.getName() + DIVIDER + rod.getFile());
out.println(rod.getName() + DIVIDER + md5sum(rod.getFile()));
}
out.println("Data:");
}
/**
*
* @param tracker the ref meta data tracker to get RODs
* @param ref reference context
* @param context the reads
* @return an 1 for each site with a rod(s), 0 otherwise
*/
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
int ret = 0;
if (tracker != null && tracker.getAllRods().size() > 0) {
out.print(context.getLocation() + DIVIDER);
Collection<GATKFeature> features = tracker.getAllRods();
for (GATKFeature feat : features)
out.print(feat.getName() + DIVIDER);
out.println(";");
ret++;
}
// if the argument was set, check for equivalence
if (allRecordsVariantContextEquivalent && tracker != null) {
Collection<VariantContext> col = tracker.getAllVariantContexts(ref);
VariantContext con = null;
for (VariantContext contextInList : col)
if (con == null) con = contextInList;
else if (!con.equals(col)) out.println("FAIL: context " + col + " doesn't match " + con);
}
return ret;
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
@Override
public Integer reduceInit() {
return 0;
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
@Override
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public void onTraversalDone(Integer result) {
// Double check traversal result to make count is the same.
// TODO: Is this check necessary?
out.println("[REDUCE RESULT] Traversal result is: " + result);
}
// shamelessly absconded and adapted from http://www.javalobby.org/java/forums/t84420.html
private String md5sum(File f) {
InputStream is;
try {
is = new FileInputStream(f);
} catch (FileNotFoundException e) {
return "Not a file";
}
byte[] buffer = new byte[8192];
int read = 0;
try {
while ((read = is.read(buffer)) > 0) {
digest.update(buffer, 0, read);
}
byte[] md5sum = digest.digest();
BigInteger bigInt = new BigInteger(1, md5sum);
return bigInt.toString(16);
}
catch (IOException e) {
throw new RuntimeException("Unable to process file for MD5", e);
}
finally {
try {
is.close();
}
catch (IOException e) {
throw new RuntimeException("Unable to close input stream for MD5 calculation", e);
}
}
}
}

View File

@ -1,298 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.Argument;
import java.io.PrintStream;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*/
@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER)
@Reference(window=@Window(start=-5,stop=5))
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
public class ValidateBAQWalker extends ReadWalker<Integer, Integer> {
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
@Argument(doc="maximum read length to apply the BAQ calculation too",required=false)
protected int maxReadLen = 1000;
@Argument(doc="",required=false)
protected int bw = 7;
@Argument(doc="",required=false)
protected boolean samtoolsMode = false;
@Argument(doc="only operates on reads with this name",required=false)
protected String readName = null;
@Argument(doc="If true, all differences are errors", required=false)
protected boolean strict = false;
@Argument(doc="prints info for each read", required=false)
protected boolean printEachRead = false;
@Argument(doc="Also prints out detailed comparison information when for known calculation differences", required=false)
protected boolean alsoPrintWarnings = false;
@Argument(doc="Include reads without BAQ tag", required=false)
protected boolean includeReadsWithoutBAQTag = false;
@Argument(doc="x each read is processed", required=false)
protected int magnification = 1;
@Argument(doc="Profile performance", required=false)
protected boolean profile = false;
int counter = 0;
BAQ baqHMM = null; // matches current samtools parameters
public void initialize() {
if ( samtoolsMode )
baqHMM = new BAQ(1e-3, 0.1, bw, (byte)0, true);
else
baqHMM = new BAQ();
}
long goodReads = 0, badReads = 0;
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
if ( (readName == null || readName.equals(read.getReadName())) && read.getReadLength() <= maxReadLen && (includeReadsWithoutBAQTag || BAQ.hasBAQTag(read) ) ) {
if ( baqHMM.excludeReadFromBAQ(read) )
return 0;
if ( profile ) {
profileBAQ(ref, read);
} else {
validateBAQ(ref, read);
}
return 1;
}
return 0;
}
SimpleTimer tagTimer = new SimpleTimer("from.tag");
SimpleTimer baqReadTimer = new SimpleTimer("baq.read");
SimpleTimer glocalTimer = new SimpleTimer("hmm.glocal");
private void profileBAQ(ReferenceContext ref, SAMRecord read) {
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
BAQ.BAQCalculationResult baq = null;
tagTimer.restart();
for ( int i = 0; i < magnification; i++ ) { BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); }
tagTimer.stop();
baqReadTimer.restart();
for ( int i = 0; i < magnification; i++ ) { baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY ); }
baqReadTimer.stop();
glocalTimer.restart();
for ( int i = 0; i < magnification; i++ )
baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY);
glocalTimer.stop();
}
private void validateBAQ(ReferenceContext ref, SAMRecord read) {
IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference();
byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag);
if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter);
BAQ.BAQCalculationResult baq = baqHMM.calcBAQFromHMM(read, refReader);
boolean fail = false;
boolean print = false;
int badi = 0;
if ( BAQ.hasBAQTag(read) ) {
for ( badi = 0; badi < baqFromTag.length; badi++ ) {
if ( baqFromTag[badi] != baq.bq[badi] ) {
if ( cigarLength(read) != read.getReadLength() ) {
print = true;
fail = false;
out.printf(" different, but cigar length != read length%n");
break;
}
if (MathUtils.arrayMin(read.getBaseQualities()) == 0) {
print = true;
fail = strict;
out.printf(" different, but Q0 base detected%n");
break;
}
else if (readHasSoftClip(read) && ! samtoolsMode) {
print = true;
fail = strict;
out.printf(" different, but soft clip detected%n");
break;
} else if (readHasDeletion(read) ) { // && ! samtoolsMode) {
print = true;
fail = strict;
out.printf(" different, but deletion detected%n");
break;
} else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) {
print = fail = true;
out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual());
break;
} else {
print = fail = true;
break;
}
}
}
if ( fail || print )
badReads++;
else
goodReads++;
}
if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) {
byte[] pos = new byte[baq.bq.length];
for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i;
out.printf(" read length : %d%n", read.getReadLength());
out.printf(" read start : %d (%d unclipped)%n", read.getAlignmentStart(), read.getUnclippedStart());
out.printf(" cigar : %s%n", read.getCigarString());
out.printf(" ref bases : %s%n", new String(baq.refBases));
out.printf(" read bases : %s%n", new String(read.getReadBases()));
out.printf(" ref length : %d%n", baq.refBases.length);
out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG));
if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true);
printQuals(" original quals: ", read.getBaseQualities(), true);
printQuals(" baq quals: ", baq.bq, true);
printQuals(" positions : ", pos, true);
printQuals(" original quals: ", read.getBaseQualities());
if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag);
printQuals(" hmm quals: ", baq.bq);
out.printf(" read bases : %s%n", new String(read.getReadBases()));
out.println(Utils.dupString('-', 80));
}
if ( fail )
throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d",
read.getReadName(), badi, baqFromTag[badi], baq.bq[badi]));
}
private final static boolean readHasSoftClip(SAMRecord read) {
for (CigarElement e : read.getCigar().getCigarElements()) {
if ( e.getOperator() == CigarOperator.SOFT_CLIP )
return true;
}
return false;
}
private final static boolean readHasDeletion(SAMRecord read) {
for (CigarElement e : read.getCigar().getCigarElements()) {
if ( e.getOperator() == CigarOperator.DELETION )
return true;
}
return false;
}
public final void printQuals( String prefix, byte[] quals ) {
printQuals(prefix, quals, false);
}
public final void printQuals( String prefix, byte[] quals, boolean asInt ) {
printQuals(out, prefix, quals, asInt);
}
public final static void printQuals( PrintStream out, String prefix, byte[] quals, boolean asInt ) {
out.print(prefix);
for ( int i = 0; i < quals.length; i++) {
if ( asInt ) {
out.printf("%2d", (int)quals[i]);
if ( i+1 != quals.length ) out.print(",");
} else
out.print((char)(quals[i]+33));
}
out.println();
}
/**
* Get the BAQ delta bytes from the tag in read. Returns null if no BAQ tag is present.
* @param read
* @return
*/
public static byte[] getBAQDeltas(SAMRecord read) {
byte[] baq = BAQ.getBAQTag(read);
if ( baq != null ) {
byte[] deltas = new byte[baq.length];
for ( int i = 0; i < deltas.length; i++)
deltas[i] = (byte)(-1 * (baq[i] - 64));
return deltas;
} else
return null;
}
private int cigarLength(SAMRecord read) {
int readI = 0;
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
int l = elt.getLength();
switch (elt.getOperator()) {
case N: // cannot handle these
return 0;
case H : case P : // ignore pads and hard clips
break;
case S :
case I :
readI += l;
break;
case D : break;
case M :
readI += l;
break;
default:
throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName());
}
}
return readI;
}
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
public void onTraversalDone(Integer nreads) {
if ( profile ) {
out.printf("n.reads baq.per.read calculation time.in.secs%n");
printTimer(nreads, tagTimer);
printTimer(nreads, glocalTimer);
printTimer(nreads, baqReadTimer);
} else {
out.printf("total reads BAQ'd %d; concordant BAQ reads %d %.4f; discordant BAQ reads %d %.4f%n", nreads,
goodReads, (100.0 * goodReads) / nreads,
badReads, (100.0 * badReads) / nreads);
}
}
private final void printTimer(int nreads, SimpleTimer timer) {
out.printf("%d %d %s %.2f%n", nreads, magnification, timer.getName(), timer.getElapsedTime());
}
}

View File

@ -12,19 +12,16 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.BeforeMethod;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.qc.ValidateBAQWalker;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.io.PrintStream;
import java.util.List;
import java.util.ArrayList;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.*;
/**
@ -188,8 +185,8 @@ public class BAQUnitTest extends BaseTest {
System.out.println(Utils.dupString('-', 40));
System.out.println("reads : " + new String(test.readBases));
ValidateBAQWalker.printQuals(System.out, "in-quals:", test.quals, false);
ValidateBAQWalker.printQuals(System.out, "bq-quals:", result.bq, false);
printQuals(System.out, "in-quals:", test.quals, false);
printQuals(System.out, "bq-quals:", result.bq, false);
for (int i = 0; i < test.quals.length; i++) {
//result.bq[i] = baqHMM.capBaseByBAQ(result.rawQuals[i], result.bq[i], result.state[i], i);
Assert.assertTrue(result.bq[i] >= baqHMM.getMinBaseQual() || test.expected[i] < baqHMM.getMinBaseQual(), "BQ < min base quality");
@ -197,4 +194,16 @@ public class BAQUnitTest extends BaseTest {
}
}
public final static void printQuals( PrintStream out, String prefix, byte[] quals, boolean asInt ) {
out.print(prefix);
for ( int i = 0; i < quals.length; i++) {
if ( asInt ) {
out.printf("%2d", (int)quals[i]);
if ( i+1 != quals.length ) out.print(",");
} else
out.print((char)(quals[i]+33));
}
out.println();
}
}