Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2012-02-13 15:10:10 -05:00
commit ad90af94ed
13 changed files with 212 additions and 69 deletions

View File

@ -185,12 +185,12 @@ public class GenomeAnalysisEngine {
public static void resetRandomGenerator(long seed) { randomGenerator.setSeed(seed); } public static void resetRandomGenerator(long seed) { randomGenerator.setSeed(seed); }
/** /**
* Static base quality score recalibration helper object * Base Quality Score Recalibration helper object
*/ */
private static BaseRecalibration baseRecalibration = null; private BaseRecalibration baseRecalibration = null;
public static BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
public static boolean hasBaseRecalibration() { return baseRecalibration != null; } public boolean hasBaseRecalibration() { return baseRecalibration != null; }
public static void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); } public void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); }
/** /**
* Actually run the GATK with the specified walker. * Actually run the GATK with the specified walker.
@ -770,6 +770,7 @@ public class GenomeAnalysisEngine {
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF, getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
getWalkerBAQQualityMode(), getWalkerBAQQualityMode(),
refReader, refReader,
getBaseRecalibration(),
argCollection.defaultBaseQualities); argCollection.defaultBaseQualities);
} }

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import java.util.Collection; import java.util.Collection;
/** /**
@ -27,23 +28,20 @@ import java.util.Collection;
* information about how they should be downsampled, sorted, and filtered. * information about how they should be downsampled, sorted, and filtered.
*/ */
public class ReadProperties { public class ReadProperties {
private Collection<SAMReaderID> readers = null; private final Collection<SAMReaderID> readers;
private SAMFileHeader header = null; private final SAMFileHeader header;
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private final SAMFileReader.ValidationStringency validationStringency;
private DownsamplingMethod downsamplingMethod = null; private final DownsamplingMethod downsamplingMethod;
private ValidationExclusion exclusionList = null; private final ValidationExclusion exclusionList;
private Collection<ReadFilter> supplementalFilters = null; private final Collection<ReadFilter> supplementalFilters;
private boolean includeReadsWithDeletionAtLoci = false; private final boolean includeReadsWithDeletionAtLoci;
private boolean useOriginalBaseQualities = false; private final boolean useOriginalBaseQualities;
private boolean generateExtendedEvents = false; private final boolean generateExtendedEvents;
private BAQ.CalculationMode cmode = BAQ.CalculationMode.OFF; private final BAQ.CalculationMode cmode;
private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY; private final BAQ.QualityMode qmode;
IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired
private byte defaultBaseQualities; private final BaseRecalibration bqsrApplier;
private final byte defaultBaseQualities;
// do we want to generate additional piles of "extended" events (indels)
// immediately after the reference base such event is associated with?
/** /**
* Return true if the walker wants to see reads that contain deletions when looking at locus pileups * Return true if the walker wants to see reads that contain deletions when looking at locus pileups
@ -126,6 +124,8 @@ public class ReadProperties {
return refReader; return refReader;
} }
public BaseRecalibration getBQSRApplier() { return bqsrApplier; }
/** /**
* @return Default base quality value to fill reads missing base quality information. * @return Default base quality value to fill reads missing base quality information.
*/ */
@ -167,6 +167,7 @@ public class ReadProperties {
BAQ.CalculationMode cmode, BAQ.CalculationMode cmode,
BAQ.QualityMode qmode, BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader, IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) { byte defaultBaseQualities) {
this.readers = samFiles; this.readers = samFiles;
this.header = header; this.header = header;
@ -180,6 +181,7 @@ public class ReadProperties {
this.cmode = cmode; this.cmode = cmode;
this.qmode = qmode; this.qmode = qmode;
this.refReader = refReader; this.refReader = refReader;
this.bqsrApplier = bqsrApplier;
this.defaultBaseQualities = defaultBaseQualities; this.defaultBaseQualities = defaultBaseQualities;
} }
} }

View File

@ -46,6 +46,8 @@ import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.baq.BAQSamIterator; import org.broadinstitute.sting.utils.baq.BAQSamIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File; import java.io.File;
@ -201,6 +203,7 @@ public class SAMDataSource {
BAQ.CalculationMode.OFF, BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY, BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ null, // no BAQ
null, // no BQSR
(byte) -1); (byte) -1);
} }
@ -237,6 +240,7 @@ public class SAMDataSource {
BAQ.CalculationMode cmode, BAQ.CalculationMode cmode,
BAQ.QualityMode qmode, BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader, IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) { byte defaultBaseQualities) {
this.readMetrics = new ReadMetrics(); this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
@ -309,6 +313,7 @@ public class SAMDataSource {
cmode, cmode,
qmode, qmode,
refReader, refReader,
bqsrApplier,
defaultBaseQualities); defaultBaseQualities);
// cache the read group id (original) -> read group id (merged) // cache the read group id (original) -> read group id (merged)
@ -591,6 +596,7 @@ public class SAMDataSource {
readProperties.getBAQCalculationMode(), readProperties.getBAQCalculationMode(),
readProperties.getBAQQualityMode(), readProperties.getBAQQualityMode(),
readProperties.getRefReader(), readProperties.getRefReader(),
readProperties.getBQSRApplier(),
readProperties.defaultBaseQualities()); readProperties.defaultBaseQualities());
} }
@ -660,9 +666,10 @@ public class SAMDataSource {
BAQ.CalculationMode cmode, BAQ.CalculationMode cmode,
BAQ.QualityMode qmode, BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader, IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) { byte defaultBaseQualities) {
if ( useOriginalBaseQualities || defaultBaseQualities >= 0 ) if (useOriginalBaseQualities || defaultBaseQualities >= 0)
// only wrap if we are replacing the original qualitiies or using a default base quality // only wrap if we are replacing the original qualities or using a default base quality
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting // NOTE: this (and other filtering) should be done before on-the-fly sorting
@ -675,6 +682,9 @@ public class SAMDataSource {
if (!noValidationOfReadOrder && enableVerification) if (!noValidationOfReadOrder && enableVerification)
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator); wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
if (bqsrApplier != null)
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
if (cmode != BAQ.CalculationMode.OFF) if (cmode != BAQ.CalculationMode.OFF)
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode); wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);

View File

@ -68,7 +68,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
if(prevLoc != null) { if(prevLoc != null) {
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) { for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii); final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals.overlaps( fakeLoc ) ) { if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null ) final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) ); : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
isActiveList.add( isActiveProb ); isActiveList.add( isActiveProb );
@ -89,7 +89,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
// Call the walkers isActive function for this locus and add them to the list to be integrated later // Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals.overlaps( location ) ) { if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus )
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) ); : ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
isActiveList.add( isActiveProb ); isActiveList.add( isActiveProb );

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr; package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -69,17 +70,32 @@ public class ContextCovariate implements StandardCovariate {
String[] insertions = new String [l]; String[] insertions = new String [l];
String[] deletions = new String [l]; String[] deletions = new String [l];
final boolean negativeStrand = read.getReadNegativeStrandFlag();
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
if (negativeStrand) {
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
}
for (int i = 0; i < read.getReadLength(); i++) { for (int i = 0; i < read.getReadLength(); i++) {
mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext); mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext); insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext); deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext);
} }
if (negativeStrand) {
reverse(mismatches);
reverse(insertions);
reverse(deletions);
}
return new CovariateValues(mismatches, insertions, deletions); return new CovariateValues(mismatches, insertions, deletions);
} }
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Comparable getValue(final String str) {
return str;
}
/** /**
* calculates the context of a base indenpendent of the covariate mode * calculates the context of a base independent of the covariate mode
* *
* @param bases the bases in the read to build the context from * @param bases the bases in the read to build the context from
* @param offset the position in the read to calculate the context for * @param offset the position in the read to calculate the context for
@ -98,9 +114,17 @@ public class ContextCovariate implements StandardCovariate {
return s; return s;
} }
// Used to get the covariate's value from input csv file during on-the-fly recalibration /**
@Override * Reverses the given array in place.
public final Comparable getValue(final String str) { *
return str; * @param array any array
*/
private static void reverse(final Comparable[] array) {
final int arrayLength = array.length;
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
final Comparable temp = array[l];
array[l] = array[r];
array[r] = temp;
}
} }
} }

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr; package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/** /**
* The object temporarily held by a read that describes all of it's covariates. * The object temporarily held by a read that describes all of it's covariates.
* *
@ -15,9 +17,9 @@ public class CovariateKeySet {
private int nextCovariateIndex; private int nextCovariateIndex;
public final static String mismatchesCovariateName = "M"; private static String mismatchesCovariateName = "M";
public final static String insertionsCovariateName = "I"; private static String insertionsCovariateName = "I";
public final static String deletionsCovariateName = "D"; private static String deletionsCovariateName = "D";
public CovariateKeySet(int readLength, int numberOfCovariates) { public CovariateKeySet(int readLength, int numberOfCovariates) {
numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format) numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
@ -37,6 +39,29 @@ public class CovariateKeySet {
nextCovariateIndex++; nextCovariateIndex++;
} }
public static RecalDataManager.BaseRecalibrationType getErrorModelFromString(final String modelString) {
if (modelString.equals(mismatchesCovariateName))
return RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION;
else if (modelString.equals(insertionsCovariateName))
return RecalDataManager.BaseRecalibrationType.BASE_INSERTION;
else if (modelString.equals(deletionsCovariateName))
return RecalDataManager.BaseRecalibrationType.BASE_DELETION;
throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
}
public Object[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
switch (errorModel) {
case BASE_SUBSTITUTION:
return getMismatchesKeySet(readPosition);
case BASE_INSERTION:
return getInsertionsKeySet(readPosition);
case BASE_DELETION:
return getDeletionsKeySet(readPosition);
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
}
}
public Object[] getMismatchesKeySet(int readPosition) { public Object[] getMismatchesKeySet(int readPosition) {
return mismatchesKeySet[readPosition]; return mismatchesKeySet[readPosition];
} }

View File

@ -34,7 +34,7 @@ public class BAQSamIterator implements StingSAMIterator {
"cmode != null" , "cmode != null" ,
"qmode != null"}) "qmode != null"})
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) { public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
if ( cmode == BAQ.CalculationMode.OFF) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF"); if ( cmode == BAQ.CalculationMode.OFF ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY"); if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
this.refReader = refReader; this.refReader = refReader;

View File

@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -150,13 +151,23 @@ public class FragmentUtils {
final int numBases = firstReadStop + secondRead.getReadLength(); final int numBases = firstReadStop + secondRead.getReadLength();
final byte[] bases = new byte[numBases]; final byte[] bases = new byte[numBases];
final byte[] quals = new byte[numBases]; final byte[] quals = new byte[numBases];
// BUGBUG: too verbose, clean this up.
final byte[] insertionQuals = new byte[numBases];
final byte[] deletionQuals = new byte[numBases];
final byte[] firstReadBases = firstRead.getReadBases(); final byte[] firstReadBases = firstRead.getReadBases();
final byte[] firstReadQuals = firstRead.getBaseQualities(); final byte[] firstReadQuals = firstRead.getBaseQualities();
final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
final byte[] secondReadBases = secondRead.getReadBases(); final byte[] secondReadBases = secondRead.getReadBases();
final byte[] secondReadQuals = secondRead.getBaseQualities(); final byte[] secondReadQuals = secondRead.getBaseQualities();
final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
for(int iii = 0; iii < firstReadStop; iii++) { for(int iii = 0; iii < firstReadStop; iii++) {
bases[iii] = firstReadBases[iii]; bases[iii] = firstReadBases[iii];
quals[iii] = firstReadQuals[iii]; quals[iii] = firstReadQuals[iii];
insertionQuals[iii] = firstReadInsertionQuals[iii];
deletionQuals[iii] = firstReadDeletionQuals[iii];
} }
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) { for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) { if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
@ -164,16 +175,22 @@ public class FragmentUtils {
} }
bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] ); bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] ); quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
} }
for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
bases[iii] = secondReadBases[iii-firstReadStop]; bases[iii] = secondReadBases[iii-firstReadStop];
quals[iii] = secondReadQuals[iii-firstReadStop]; quals[iii] = secondReadQuals[iii-firstReadStop];
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
} }
final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader()); final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
returnRead.setAlignmentStart(firstRead.getUnclippedStart()); returnRead.setAlignmentStart(firstRead.getUnclippedStart());
returnRead.setReadBases( bases ); returnRead.setReadBases( bases );
returnRead.setBaseQualities( quals ); returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION );
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
returnRead.setReadGroup( firstRead.getReadGroup() ); returnRead.setReadGroup( firstRead.getReadGroup() );
returnRead.setReferenceName( firstRead.getReferenceName() ); returnRead.setReferenceName( firstRead.getReferenceName() );
final CigarElement c = new CigarElement(bases.length, CigarOperator.M); final CigarElement c = new CigarElement(bases.length, CigarOperator.M);

View File

@ -0,0 +1,50 @@
package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 2/13/12
*/
public class BQSRSamIterator implements StingSAMIterator {
private final StingSAMIterator it;
private final BaseRecalibration bqsr;
/**
* Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads.
*
* @param it The incoming SamIterator to wrap
* @param bqsr The object which holds the BQSR table information and knows how to apply it
*/
@Requires({
"it != null",
"bqsr != null"})
public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) {
if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object");
this.it = it;
this.bqsr = bqsr;
}
@Requires("hasNext()")
@Ensures("result != null")
public SAMRecord next() {
SAMRecord read = it.next();
bqsr.recalibrateRead((GATKSAMRecord) read);
return read;
}
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public Iterator<SAMRecord> iterator() { return this; }
}

View File

@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -165,9 +166,7 @@ public class BaseRecalibration {
key[iii] = cov.getValue( vals[iii] ); key[iii] = cov.getValue( vals[iii] );
} }
final String modelString = vals[iii++]; final String modelString = vals[iii++];
final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION : final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString);
( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION :
( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) );
// Create a new datum using the number of observations, number of mismatches, and reported quality score // Create a new datum using the number of observations, number of mismatches, and reported quality score
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 ); final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
@ -183,18 +182,15 @@ public class BaseRecalibration {
final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read ); final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) { for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() : final byte[] originalQuals = read.getBaseQualities( errorModel );
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() :
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) );
final byte[] recalQuals = originalQuals.clone(); final byte[] recalQuals = originalQuals.clone();
// For each base in the read // For each base in the read
for( int offset = 0; offset < read.getReadLength(); offset++ ) { for( int offset = 0; offset < read.getReadLength(); offset++ ) {
final Object[] fullCovariateKey = final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) :
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) : final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) );
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey); Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
if( qualityScore == null ) { if( qualityScore == null ) {
@ -206,23 +202,10 @@ public class BaseRecalibration {
} }
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
switch (errorModel) { read.setBaseQualities( recalQuals, errorModel );
case BASE_SUBSTITUTION:
read.setBaseQualities( recalQuals );
break;
case BASE_INSERTION:
read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals );
break;
case BASE_DELETION:
read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals );
break;
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
} }
} }
}
/** /**
* Implements a serial recalibration of the reads using the combinational table. * Implements a serial recalibration of the reads using the combinational table.
* First, we perform a positional recalibration, and then a subsequent dinuc correction. * First, we perform a positional recalibration, and then a subsequent dinuc correction.

View File

@ -25,9 +25,9 @@
package org.broadinstitute.sting.utils.sam; package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -163,8 +163,37 @@ public class GATKSAMRecord extends BAMRecord {
} }
/** /**
* Accessors for base insertion and base deletion quality scores * Setters and Accessors for base insertion and base deletion quality scores
*/ */
public void setBaseQualities( final byte[] quals, final RecalDataManager.BaseRecalibrationType errorModel ) {
switch( errorModel ) {
case BASE_SUBSTITUTION:
setBaseQualities(quals);
break;
case BASE_INSERTION:
setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals );
break;
case BASE_DELETION:
setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals );
break;
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
}
}
public byte[] getBaseQualities( final RecalDataManager.BaseRecalibrationType errorModel ) {
switch( errorModel ) {
case BASE_SUBSTITUTION:
return getBaseQualities();
case BASE_INSERTION:
return getBaseInsertionQualities();
case BASE_DELETION:
return getBaseDeletionQualities();
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
}
}
public byte[] getBaseInsertionQualities() { public byte[] getBaseInsertionQualities() {
byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES ); byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
if( quals == null ) { if( quals == null ) {

View File

@ -79,7 +79,8 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
false, false,
BAQ.CalculationMode.OFF, BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY, BAQ.QualityMode.DONT_MODIFY,
null, null, // no BAQ
null, // no BQSR
(byte)0); (byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());

View File

@ -308,6 +308,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
BAQ.CalculationMode.OFF, BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY, BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ null, // no BAQ
null, // no BQSR
(byte) -1 (byte) -1
); );
} }