Resolve merge conflicts
This commit is contained in:
commit
31c394d588
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -91,7 +90,7 @@ import java.util.TreeSet;
|
|||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
public class PrintReadsWalker extends ReadWalker<GATKSAMRecord, SAMFileWriter> {
|
||||
|
||||
@Output(doc="Write output to this BAM filename instead of STDOUT")
|
||||
SAMFileWriter out;
|
||||
|
|
@ -129,6 +128,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
||||
public Set<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
/**
|
||||
* Erase all extra attributes in the read but keep the read group information
|
||||
*/
|
||||
@Argument(fullName="simplify", shortName="s", doc="Simplify all reads.", required=false)
|
||||
public boolean simplifyReads = false;
|
||||
|
||||
|
||||
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||
private boolean SAMPLES_SPECIFIED = false;
|
||||
|
||||
|
|
@ -162,7 +168,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
|
|
@ -208,11 +214,11 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return simplifyReads ? read.simplify() : read;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -232,7 +238,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* @param output the output source
|
||||
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
||||
*/
|
||||
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
|
||||
public SAMFileWriter reduce( GATKSAMRecord read, SAMFileWriter output ) {
|
||||
output.addAlignment(read);
|
||||
return output;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class BaseQualityRankSumTest extends RankSumTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
// TODO -- implement me; how do we pull out the correct offset from the read?
|
||||
return;
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ public class MappingQualityRankSumTest extends RankSumTest {
|
|||
}
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
if ( context == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), context, refQuals, altQuals);
|
||||
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), vc.getStart(), context, refQuals, altQuals);
|
||||
}
|
||||
|
||||
if ( refQuals.size() == 0 || altQuals.size() == 0 )
|
||||
|
|
@ -146,7 +146,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
return map;
|
||||
}
|
||||
|
||||
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
|
||||
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
|
||||
|
||||
protected abstract void fillQualsFromPileup(final byte ref, final List<Byte> alts, final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -47,11 +48,7 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
}
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
|
||||
// TODO -- implement me; how do we pull out the correct offset from the read?
|
||||
return;
|
||||
|
||||
/*
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
|
|
@ -59,13 +56,21 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getUnclippedStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
|
||||
continue;
|
||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
|
||||
|
||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBases( read );
|
||||
if (readPos > numAlignedBases / 2)
|
||||
readPos = numAlignedBases - (readPos + 1);
|
||||
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)read.getMappingQuality());
|
||||
refQuals.add((double) readPos);
|
||||
else
|
||||
altQuals.add((double)read.getMappingQuality());
|
||||
altQuals.add((double) readPos);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* <p> [Long description of the walker] </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2> <p> [Description of the Input] </p>
|
||||
*
|
||||
* <h2>Output</h2> <p> [Description of the Output] </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T [walker name]
|
||||
* </pre>
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 4/17/12
|
||||
*/
|
||||
public class AccuracyDatum extends RecalDatum {
|
||||
private final List<Double> accuracy = new LinkedList<Double>();
|
||||
private final List<Byte> reportedQualities = new LinkedList<Byte>();
|
||||
|
||||
public AccuracyDatum(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
super(recalDatum);
|
||||
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
|
||||
reportedQualities.add(originalQuality);
|
||||
}
|
||||
|
||||
public void combine(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
this.combine(recalDatum);
|
||||
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
|
||||
reportedQualities.add(originalQuality);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s,%.2f,%.2f", super.toString(), MathUtils.average(reportedQualities), MathUtils.average(accuracy));
|
||||
}
|
||||
|
||||
private static double calculateAccuracy(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
return recalDatum.getEmpiricalQuality() - originalQuality;
|
||||
}
|
||||
}
|
||||
|
|
@ -52,7 +52,7 @@ public class BQSRKeyManager {
|
|||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
|
||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ public class BQSRKeyManager {
|
|||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
List<Object> objectKeys = new LinkedList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
|
|
@ -286,7 +286,7 @@ public class BQSRKeyManager {
|
|||
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||
|
||||
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
|
||||
RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
|
||||
this.bitsBefore = bitsBefore;
|
||||
this.mask = mask;
|
||||
this.covariate = covariate;
|
||||
|
|
|
|||
|
|
@ -47,9 +47,6 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private int insertionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
|
||||
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
|
||||
// protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
|
||||
|
||||
private byte LOW_QUAL_TAIL;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private BitSet[] mismatches;
|
||||
private BitSet[] insertions;
|
||||
private BitSet[] deletions;
|
||||
private final BitSet[] mismatches;
|
||||
private final BitSet[] insertions;
|
||||
private final BitSet[] deletions;
|
||||
|
||||
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
|
|
@ -38,10 +36,13 @@ import java.util.List;
|
|||
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatumOptimized {
|
||||
public class Datum {
|
||||
|
||||
long numObservations; // number of bases seen in total
|
||||
long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
protected long numObservations; // number of bases seen in total
|
||||
protected long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -49,67 +50,43 @@ public class RecalDatumOptimized {
|
|||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public RecalDatumOptimized() {
|
||||
public Datum() {
|
||||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
}
|
||||
|
||||
public RecalDatumOptimized(final long _numObservations, final long _numMismatches) {
|
||||
numObservations = _numObservations;
|
||||
numMismatches = _numMismatches;
|
||||
}
|
||||
|
||||
public RecalDatumOptimized(final RecalDatumOptimized copy) {
|
||||
this.numObservations = copy.numObservations;
|
||||
this.numMismatches = copy.numMismatches;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public synchronized final void increment(final long incObservations, final long incMismatches) {
|
||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
}
|
||||
|
||||
public synchronized final void increment(final RecalDatumOptimized other) {
|
||||
increment(other.numObservations, other.numMismatches);
|
||||
}
|
||||
|
||||
public synchronized final void increment(final List<RecalDatumOptimized> data) {
|
||||
for (RecalDatumOptimized other : data) {
|
||||
this.increment(other);
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final double empiricalQualDouble(final int smoothing, final double maxQual) {
|
||||
final double doubleMismatches = (double) (numMismatches + smoothing);
|
||||
final double doubleObservations = (double) (numObservations + smoothing);
|
||||
double empiricalQualDouble() {
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, maxQual);
|
||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
|
||||
}
|
||||
|
||||
public final byte empiricalQualByte(final int smoothing) {
|
||||
final double doubleMismatches = (double) (numMismatches + smoothing);
|
||||
final double doubleObservations = (double) (numObservations + smoothing);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
public final byte empiricalQualByte() {
|
||||
return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
|
||||
byte empiricalQualByte() {
|
||||
final double doubleMismatches = (double) (numMismatches);
|
||||
final double doubleObservations = (double) (numObservations);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||
}
|
||||
|
||||
|
|
@ -7,8 +7,8 @@ public enum EventType {
|
|||
BASE_INSERTION(1, "I"),
|
||||
BASE_DELETION(2, "D");
|
||||
|
||||
public int index;
|
||||
public String representation;
|
||||
public final int index;
|
||||
private final String representation;
|
||||
|
||||
private EventType(int index, String representation) {
|
||||
this.index = index;
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ import java.util.Map;
|
|||
public class QuantizationInfo {
|
||||
private List<Byte> quantizedQuals;
|
||||
private List<Long> empiricalQualCounts;
|
||||
int quantizationLevels;
|
||||
private int quantizationLevels;
|
||||
|
||||
public QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
|
||||
private QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
|
||||
this.quantizedQuals = quantizedQuals;
|
||||
this.empiricalQualCounts = empiricalQualCounts;
|
||||
this.quantizationLevels = quantizationLevels;
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private BitSet[][] mismatchesKeySet;
|
||||
private BitSet[][] insertionsKeySet;
|
||||
private BitSet[][] deletionsKeySet;
|
||||
private final BitSet[][] mismatchesKeySet;
|
||||
private final BitSet[][] insertionsKeySet;
|
||||
private final BitSet[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -221,7 +220,7 @@ public class RecalDataManager {
|
|||
logger.info("");
|
||||
}
|
||||
|
||||
public static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
|
|
@ -349,7 +348,7 @@ public class RecalDataManager {
|
|||
* @param read The SAMRecord to parse
|
||||
* @return whether or not this read should be skipped
|
||||
*/
|
||||
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
|
|
@ -378,99 +377,10 @@ public class RecalDataManager {
|
|||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
|
||||
else
|
||||
return false; // otherwise, just skip the read
|
||||
return true; // otherwise, just skip the read
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
|
||||
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
|
||||
*
|
||||
* @param read The SAMRecord to parse
|
||||
* @param originalQualScores The array of original quality scores to modify during the correction
|
||||
* @param solidRecalMode Which mode of solid recalibration to apply
|
||||
* @param refBases The reference for this read
|
||||
* @return A new array of quality scores that have been ref bias corrected
|
||||
*/
|
||||
public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
|
||||
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
if (attr instanceof String) {
|
||||
colorSpace = ((String) attr).getBytes();
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
byte[] readBases = read.getReadBases();
|
||||
final byte[] colorImpliedBases = readBases.clone();
|
||||
byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
||||
refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
|
||||
}
|
||||
final int[] inconsistency = new int[readBases.length];
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for (int iii = 0; iii < readBases.length; iii++) {
|
||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
|
||||
colorImpliedBases[iii] = thisBase;
|
||||
inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
|
||||
prevBase = readBases[iii];
|
||||
}
|
||||
|
||||
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
|
||||
if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
|
||||
final boolean setBaseN = false;
|
||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||
}
|
||||
else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
|
||||
final boolean setBaseN = true;
|
||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||
}
|
||||
else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
||||
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
}
|
||||
|
||||
return originalQualScores;
|
||||
}
|
||||
|
||||
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
|
||||
if (ReadUtils.isSOLiDRead(read)) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
if (attr instanceof String) {
|
||||
colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
for (byte color : colorSpace) {
|
||||
if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
|
||||
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
}
|
||||
}
|
||||
|
||||
return false; // There aren't any color no calls in this SOLiD read
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -625,16 +535,16 @@ public class RecalDataManager {
|
|||
* @param offset The offset in the read at which to check
|
||||
* @return Returns true if the base was inconsistent with the color space
|
||||
*/
|
||||
public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
|
||||
public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
|
||||
if (attr != null) {
|
||||
final byte[] inconsistency = (byte[]) attr;
|
||||
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
|
||||
if (read.getReadNegativeStrandFlag()) { // Negative direction
|
||||
return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
|
||||
return inconsistency[inconsistency.length - offset - 1] == (byte) 0;
|
||||
}
|
||||
else { // Forward direction
|
||||
return inconsistency[offset] != (byte) 0;
|
||||
return inconsistency[offset] == (byte) 0;
|
||||
}
|
||||
|
||||
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
|
||||
|
|
@ -654,7 +564,7 @@ public class RecalDataManager {
|
|||
|
||||
}
|
||||
else { // No inconsistency array, so nothing is inconsistent
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -33,12 +33,11 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatum extends RecalDatumOptimized {
|
||||
public class RecalDatum extends Datum {
|
||||
|
||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -50,7 +49,7 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
estimatedQReported = 0.0;
|
||||
empiricalQuality = 0.0;
|
||||
empiricalQuality = -1.0;
|
||||
}
|
||||
|
||||
public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) {
|
||||
|
|
@ -67,60 +66,52 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final void combine(final RecalDatum other) {
|
||||
public void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
this.increment(other.numObservations, other.numMismatches);
|
||||
this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||
this.empiricalQuality = -1.0; // reset the empirical quality calculation so we never have a wrongly calculated empirical quality stored
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final void calcCombinedEmpiricalQuality(final int maxQual) {
|
||||
this.empiricalQuality = empiricalQualDouble(SMOOTHING_CONSTANT, maxQual); // cache the value so we don't call log over and over again
|
||||
public final void calcCombinedEmpiricalQuality() {
|
||||
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
||||
}
|
||||
|
||||
public final void calcEstimatedReportedQuality() {
|
||||
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// misc. methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final double getEstimatedQReported() {
|
||||
return estimatedQReported;
|
||||
}
|
||||
|
||||
public final double getEmpiricalQuality() {
|
||||
if (empiricalQuality < 0)
|
||||
calcCombinedEmpiricalQuality();
|
||||
return empiricalQuality;
|
||||
}
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a hard copy of the recal datum element
|
||||
*
|
||||
* @return a new recal datum object with the same contents of this datum.
|
||||
*/
|
||||
protected RecalDatum copy() {
|
||||
public RecalDatum copy() {
|
||||
return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class RecalibrationArgumentCollection {
|
|||
*/
|
||||
@Gather(BQSRGatherer.class)
|
||||
@Output
|
||||
public PrintStream RECAL_FILE;
|
||||
public File RECAL_FILE;
|
||||
|
||||
/**
|
||||
* List all implemented covariates.
|
||||
|
|
|
|||
|
|
@ -18,13 +18,11 @@ import java.util.*;
|
|||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
|
||||
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
|
||||
|
||||
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
|
@ -53,6 +51,7 @@ public class RecalibrationReport {
|
|||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
|
||||
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
table = parseReadGroupTable(keyManager, reportTable);
|
||||
|
|
@ -292,7 +291,7 @@ public class RecalibrationReport {
|
|||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE);
|
||||
datum.calcCombinedEmpiricalQuality();
|
||||
|
||||
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -45,8 +46,7 @@ public class DepthOfCoverageStats {
|
|||
|
||||
public static int[] calculateBinEndpoints(int lower, int upper, int bins) {
|
||||
if ( bins > upper - lower || lower < 1 ) {
|
||||
throw new IllegalArgumentException("Illegal argument to calculateBinEndpoints; "+
|
||||
"lower bound must be at least 1, and number of bins may not exceed stop - start");
|
||||
throw new UserException.BadInput("the start must be at least 1 and the number of bins may not exceed stop - start");
|
||||
}
|
||||
|
||||
int[] binLeftEndpoints = new int[bins+1];
|
||||
|
|
|
|||
|
|
@ -146,6 +146,10 @@ public class ConsensusAlleleCounter {
|
|||
String indelString = p.getEventBases();
|
||||
|
||||
if ( p.isBeforeInsertion() ) {
|
||||
// edge case: ignore a deletion immediately preceding an insertion as p.getEventBases() returns null [EB]
|
||||
if ( indelString == null )
|
||||
continue;
|
||||
|
||||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class PairHMMIndelErrorModel {
|
|||
public static final int BASE_QUAL_THRESHOLD = 20;
|
||||
|
||||
private boolean DEBUG = false;
|
||||
private boolean bandedLikelihoods = true;
|
||||
private boolean bandedLikelihoods = false;
|
||||
|
||||
private static final int MAX_CACHED_QUAL = 127;
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
public PairHMMIndelErrorModel(byte indelGOP, byte indelGCP, boolean deb, boolean bandedLikelihoods) {
|
||||
this.DEBUG = deb;
|
||||
//this.bandedLikelihoods = bandedLikelihoods;
|
||||
this.bandedLikelihoods = bandedLikelihoods;
|
||||
|
||||
// fill gap penalty table, affine naive model:
|
||||
this.GAP_CONT_PROB_TABLE = new byte[MAX_HRUN_GAP_IDX];
|
||||
|
|
@ -157,219 +157,6 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
|
||||
|
||||
private static void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
|
||||
byte[] currentGOP, byte[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
|
||||
if (indI > 0 && indJ > 0) {
|
||||
final int im1 = indI -1;
|
||||
final int jm1 = indJ - 1;
|
||||
// update current point
|
||||
final byte x = readBases[im1];
|
||||
final byte y = haplotypeBases[jm1];
|
||||
final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
|
||||
|
||||
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
||||
|
||||
matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]});
|
||||
|
||||
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGOP[im1]/10.0;
|
||||
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGCP[im1]/10.0;
|
||||
|
||||
XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
||||
|
||||
// update Y array
|
||||
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGOP[im1]/10.0;
|
||||
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : -(double)currentGCP[im1]/10.0;
|
||||
YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
||||
}
|
||||
}
|
||||
|
||||
public static double computeReadLikehoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
|
||||
byte[] currentGOP, byte[] currentGCP, boolean bandedLikelihoods) {
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
|
||||
final int X_METRIC_LENGTH = readBases.length + 1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
|
||||
|
||||
// initial arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||
final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
|
||||
return computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentGOP,
|
||||
currentGCP, 0, matchMetricArray, XMetricArray, YMetricArray, bandedLikelihoods);
|
||||
|
||||
}
|
||||
private static double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
|
||||
byte[] currentGOP, byte[] currentGCP, int indToStart,
|
||||
double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray,
|
||||
boolean bandedLikelihoods) {
|
||||
|
||||
final int X_METRIC_LENGTH = readBases.length+1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
||||
|
||||
if (indToStart == 0) {
|
||||
// default initialization for all arrays
|
||||
|
||||
for (int i=0; i < X_METRIC_LENGTH; i++) {
|
||||
Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY);
|
||||
Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY);
|
||||
Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
for (int i=1; i < X_METRIC_LENGTH; i++) {
|
||||
//initialize first column
|
||||
XMetricArray[i][0] = END_GAP_COST*(i);
|
||||
}
|
||||
|
||||
for (int j=1; j < Y_METRIC_LENGTH; j++) {
|
||||
// initialize first row
|
||||
YMetricArray[0][j] = END_GAP_COST*(j);
|
||||
}
|
||||
matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
|
||||
XMetricArray[0][0]= YMetricArray[0][0] = 0;
|
||||
}
|
||||
|
||||
|
||||
if (bandedLikelihoods) {
|
||||
final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect.
|
||||
|
||||
final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1;
|
||||
final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
|
||||
int idxWithMaxElement = 0;
|
||||
|
||||
for (int diag=indToStart; diag < numDiags; diag++) {
|
||||
// compute default I and J start positions at edge of diagonals
|
||||
int indI = 0;
|
||||
int indJ = diag;
|
||||
if (diag >= Y_METRIC_LENGTH ) {
|
||||
indI = diag-(Y_METRIC_LENGTH-1);
|
||||
indJ = Y_METRIC_LENGTH-1;
|
||||
}
|
||||
|
||||
// first pass: from max element to edge
|
||||
int idxLow = idxWithMaxElement;
|
||||
|
||||
// reset diag max value before starting
|
||||
double maxElementInDiag = Double.NEGATIVE_INFINITY;
|
||||
// set indI, indJ to correct values
|
||||
indI += idxLow;
|
||||
indJ -= idxLow;
|
||||
if (indI >= X_METRIC_LENGTH || indJ < 0) {
|
||||
idxLow--;
|
||||
indI--;
|
||||
indJ++;
|
||||
}
|
||||
|
||||
|
||||
for (int el = idxLow; el < elemsInDiag; el++) {
|
||||
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
|
||||
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
|
||||
// update max in diagonal
|
||||
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
|
||||
|
||||
// check if we've fallen off diagonal value by threshold
|
||||
if (bestMetric > maxElementInDiag) {
|
||||
maxElementInDiag = bestMetric;
|
||||
idxWithMaxElement = el;
|
||||
}
|
||||
else if (bestMetric < maxElementInDiag - DIAG_TOL && idxWithMaxElement > 0)
|
||||
break; // done w/current diagonal
|
||||
|
||||
indI++;
|
||||
if (indI >=X_METRIC_LENGTH )
|
||||
break;
|
||||
indJ--;
|
||||
if (indJ <= 0)
|
||||
break;
|
||||
}
|
||||
if (idxLow > 0) {
|
||||
// now do second part in opposite direction
|
||||
indI = 0;
|
||||
indJ = diag;
|
||||
if (diag >= Y_METRIC_LENGTH ) {
|
||||
indI = diag-(Y_METRIC_LENGTH-1);
|
||||
indJ = Y_METRIC_LENGTH-1;
|
||||
}
|
||||
|
||||
indI += idxLow-1;
|
||||
indJ -= idxLow-1;
|
||||
for (int el = idxLow-1; el >= 0; el--) {
|
||||
|
||||
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
|
||||
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
|
||||
// update max in diagonal
|
||||
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
|
||||
|
||||
// check if we've fallen off diagonal value by threshold
|
||||
if (bestMetric > maxElementInDiag) {
|
||||
maxElementInDiag = bestMetric;
|
||||
idxWithMaxElement = el;
|
||||
}
|
||||
else if (bestMetric < maxElementInDiag - DIAG_TOL)
|
||||
break; // done w/current diagonal
|
||||
|
||||
indJ++;
|
||||
if (indJ >= Y_METRIC_LENGTH )
|
||||
break;
|
||||
indI--;
|
||||
if (indI <= 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if (DEBUG)
|
||||
// System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// simplified rectangular version of update loop
|
||||
for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
|
||||
for (int indJ=indToStart+1; indJ < Y_METRIC_LENGTH; indJ++) {
|
||||
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
|
||||
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
|
||||
final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] });
|
||||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
PrintStream outx, outy, outm, outs;
|
||||
double[][] sumMetrics = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
try {
|
||||
outx = new PrintStream("datax.txt");
|
||||
outy = new PrintStream("datay.txt");
|
||||
outm = new PrintStream("datam.txt");
|
||||
outs = new PrintStream("datas.txt");
|
||||
double metrics[] = new double[3];
|
||||
for (int indI=0; indI < X_METRIC_LENGTH; indI++) {
|
||||
for (int indJ=0; indJ < Y_METRIC_LENGTH; indJ++) {
|
||||
metrics[0] = matchMetricArray[indI][indJ];
|
||||
metrics[1] = XMetricArray[indI][indJ];
|
||||
metrics[2] = YMetricArray[indI][indJ];
|
||||
//sumMetrics[indI][indJ] = MathUtils.softMax(metrics);
|
||||
outx.format("%4.1f ", metrics[1]);
|
||||
outy.format("%4.1f ", metrics[2]);
|
||||
outm.format("%4.1f ", metrics[0]);
|
||||
outs.format("%4.1f ", MathUtils.softMax(metrics));
|
||||
}
|
||||
outx.println(); outm.println();outy.println(); outs.println();
|
||||
}
|
||||
outm.close(); outx.close(); outy.close();
|
||||
} catch (java.io.IOException e) { throw new UserException("bla");}
|
||||
}
|
||||
*/
|
||||
|
||||
return bestMetric;
|
||||
|
||||
}
|
||||
|
||||
private void fillGapProbabilities(final int[] hrunProfile,
|
||||
final byte[] contextLogGapOpenProbabilities,
|
||||
final byte[] contextLogGapContinuationProbabilities) {
|
||||
|
|
@ -598,8 +385,8 @@ public class PairHMMIndelErrorModel {
|
|||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
|
||||
final int X_METRIC_LENGTH = readBases.length+1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
||||
final int X_METRIC_LENGTH = readBases.length+2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
||||
|
||||
if (matchMetricArray == null) {
|
||||
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||
|
|
@ -611,21 +398,10 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
|
||||
/*
|
||||
if (previousHaplotypeSeen == null)
|
||||
startIndexInHaplotype = 0;
|
||||
else
|
||||
startIndexInHaplotype = 0; //computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
||||
|
||||
previousHaplotypeSeen = haplotypeBases.clone();
|
||||
*/
|
||||
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
|
||||
contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
|
||||
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
||||
|
||||
double l2 = computeReadLikehoodGivenHaplotype(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
|
||||
contextLogGapContinuationProbabilities, bandedLikelihoods);
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("H:"+new String(haplotypeBases));
|
||||
System.out.println("R:"+new String(readBases));
|
||||
|
|
|
|||
|
|
@ -35,16 +35,14 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -86,8 +84,8 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
public class ApplyRecalibration extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
/////////////////////////////
|
||||
// Inputs
|
||||
|
|
@ -98,9 +96,9 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
@Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
|
||||
public List<RodBinding<VariantContext>> input;
|
||||
@Input(fullName="recal_file", shortName="recalFile", doc="The input recal file used by ApplyRecalibration", required=true)
|
||||
private File RECAL_FILE;
|
||||
protected RodBinding<VariantContext> recal;
|
||||
@Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true)
|
||||
private File TRANCHES_FILE;
|
||||
protected File TRANCHES_FILE;
|
||||
|
||||
/////////////////////////////
|
||||
// Outputs
|
||||
|
|
@ -112,7 +110,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering", required=false)
|
||||
private double TS_FILTER_LEVEL = 99.0;
|
||||
protected double TS_FILTER_LEVEL = 99.0;
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
@Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false)
|
||||
|
|
@ -123,8 +121,6 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
/////////////////////////////
|
||||
final private List<Tranche> tranches = new ArrayList<Tranche>();
|
||||
final private Set<String> inputNames = new HashSet<String>();
|
||||
final private NestedHashMap lodMap = new NestedHashMap();
|
||||
final private NestedHashMap annotationMap = new NestedHashMap();
|
||||
final private Set<String> ignoreInputFilterSet = new TreeSet<String>();
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -174,20 +170,6 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
|
||||
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
try {
|
||||
logger.info("Reading in recalibration table...");
|
||||
for ( final String line : new XReadLines( RECAL_FILE ) ) {
|
||||
final String[] vals = line.split(",");
|
||||
lodMap.put( Double.parseDouble(vals[3]), vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys
|
||||
annotationMap.put( vals[4], vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys
|
||||
}
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(RECAL_FILE, e);
|
||||
} catch ( Exception e ) {
|
||||
throw new UserException.MalformedFile(RECAL_FILE, "Could not parse LOD and annotation information in input recal file. File is somehow malformed.");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -202,52 +184,75 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
|
||||
if( vc != null ) {
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
String filterString = null;
|
||||
final List<VariantContext> VCs = tracker.getValues(input, context.getLocation());
|
||||
final List<VariantContext> recals = tracker.getValues(recal, context.getLocation());
|
||||
|
||||
final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() );
|
||||
final String worstAnnotation = (String) annotationMap.get( vc.getChr(), vc.getStart(), vc.getEnd() );
|
||||
if( lod == null ) {
|
||||
throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc );
|
||||
}
|
||||
for( final VariantContext vc : VCs ) {
|
||||
|
||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, worstAnnotation);
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if( i == tranches.size() - 1 ) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
filterString = tranche.name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( filterString == null ) {
|
||||
filterString = tranches.get(0).name+"+";
|
||||
}
|
||||
|
||||
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
builder.filters(filterString);
|
||||
}
|
||||
|
||||
vcfWriter.add( builder.make() );
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc );
|
||||
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
|
||||
if( recalDatum == null ) {
|
||||
throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc );
|
||||
}
|
||||
|
||||
final String lodString = recalDatum.getAttributeAsString(VariantRecalibrator.VQS_LOD_KEY, null);
|
||||
if( lodString == null ) {
|
||||
throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc );
|
||||
}
|
||||
final double lod;
|
||||
try {
|
||||
lod = Double.valueOf(lodString);
|
||||
} catch (NumberFormatException e) {
|
||||
throw new UserException("Encountered a malformed record in the input recal file. The lod is unreadable for the record at: " + vc );
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
String filterString = null;
|
||||
|
||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lodString); // use the String representation so that we don't lose precision on output
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
||||
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if( i == tranches.size() - 1 ) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
} else {
|
||||
filterString = tranche.name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( filterString == null ) {
|
||||
filterString = tranches.get(0).name+"+";
|
||||
}
|
||||
|
||||
if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
builder.filters(filterString);
|
||||
}
|
||||
|
||||
vcfWriter.add( builder.make() );
|
||||
} else { // valid VC but not compatible with this mode, so just emit the variant untouched
|
||||
vcfWriter.add( vc );
|
||||
}
|
||||
}
|
||||
|
||||
return 1; // This value isn't used for anything
|
||||
}
|
||||
|
||||
private static VariantContext getMatchingRecalVC(final VariantContext target, final List<VariantContext> recalVCs) {
|
||||
for( final VariantContext recalVC : recalVCs ) {
|
||||
if ( target.getEnd() == recalVC.getEnd() ) {
|
||||
return recalVC;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
@ -262,6 +267,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
return 1; // This value isn't used for anything
|
||||
}
|
||||
|
||||
public Integer treeReduce( final Integer lhs, final Integer rhs ) {
|
||||
return 1; // This value isn't used for anything
|
||||
}
|
||||
|
||||
public void onTraversalDone( final Integer reduceSum ) {
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,14 +30,16 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -285,11 +287,28 @@ public class VariantDataManager {
|
|||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
|
||||
}
|
||||
|
||||
public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) {
|
||||
public void writeOutRecalibrationTable( final VCFWriter recalWriter ) {
|
||||
// we need to sort in coordinate order in order to produce a valid VCF
|
||||
Collections.sort( data, new Comparator<VariantDatum>() {
|
||||
public int compare(VariantDatum vd1, VariantDatum vd2) {
|
||||
return vd1.loc.compareTo(vd2.loc);
|
||||
}} );
|
||||
|
||||
// create dummy alleles to be used
|
||||
final List<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create("N", true));
|
||||
alleles.add(Allele.create("<VQSR>", false));
|
||||
|
||||
// to be used for the important INFO tags
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>(3);
|
||||
|
||||
for( final VariantDatum datum : data ) {
|
||||
RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s",
|
||||
datum.contig, datum.start, datum.stop, datum.lod,
|
||||
(datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
|
||||
attributes.put(VCFConstants.END_KEY, datum.loc.getStop());
|
||||
attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
||||
attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStart(), alleles).attributes(attributes);
|
||||
recalWriter.add(builder.make());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -46,9 +48,7 @@ public class VariantDatum implements Comparable<VariantDatum> {
|
|||
public double originalQual;
|
||||
public double prior;
|
||||
public int consensusCount;
|
||||
public String contig;
|
||||
public int start;
|
||||
public int stop;
|
||||
public GenomeLoc loc;
|
||||
public int worstAnnotation;
|
||||
public MultivariateGaussian assignment; // used in K-means implementation
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
|
|
@ -136,9 +138,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Outputs
|
||||
/////////////////////////////
|
||||
@Output(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
|
||||
private PrintStream RECAL_FILE;
|
||||
protected File recalFile = null;
|
||||
protected StandardVCFWriter recalWriter = null;
|
||||
|
||||
@Output(fullName="tranches_file", shortName="tranchesFile", doc="The output tranches file used by ApplyRecalibration", required=true)
|
||||
private File TRANCHES_FILE;
|
||||
protected File TRANCHES_FILE;
|
||||
|
||||
/////////////////////////////
|
||||
// Additional Command Line Arguments
|
||||
|
|
@ -150,7 +154,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
* that this parameter is used for display purposes only and isn't used anywhere in the algorithm!
|
||||
*/
|
||||
@Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!", required=false)
|
||||
private double TARGET_TITV = 2.15;
|
||||
protected double TARGET_TITV = 2.15;
|
||||
|
||||
/**
|
||||
* See the input VCF file's INFO field for a list of all available annotations.
|
||||
|
|
@ -170,7 +174,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Output(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||
private File RSCRIPT_FILE = null;
|
||||
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering, used here to indicate filtered variants in the model reporting plots", required=false)
|
||||
private double TS_FILTER_LEVEL = 99.0;
|
||||
protected double TS_FILTER_LEVEL = 99.0;
|
||||
|
||||
/////////////////////////////
|
||||
// Debug Arguments
|
||||
|
|
@ -224,6 +228,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
if( !dataManager.checkHasTruthSet() ) {
|
||||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader( null, Collections.<String>emptySet() );
|
||||
recalWriter = new StandardVCFWriter(recalFile, getMasterSequenceDictionary(), false);
|
||||
recalWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -246,9 +254,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
|
||||
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
|
||||
dataManager.decodeAnnotations( datum, vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
|
||||
datum.contig = vc.getChr();
|
||||
datum.start = vc.getStart();
|
||||
datum.stop = vc.getEnd();
|
||||
datum.loc = getToolkit().getGenomeLocParser().createGenomeLoc(vc);
|
||||
datum.originalQual = vc.getPhredScaledQual();
|
||||
datum.isSNP = vc.isSNP() && vc.isBiallelic();
|
||||
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
|
||||
|
|
@ -345,7 +351,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
}
|
||||
|
||||
logger.info( "Writing out recalibration table..." );
|
||||
dataManager.writeOutRecalibrationTable( RECAL_FILE );
|
||||
dataManager.writeOutRecalibrationTable( recalWriter );
|
||||
if( RSCRIPT_FILE != null ) {
|
||||
logger.info( "Writing out visualization Rscript file...");
|
||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff );
|
||||
|
|
|
|||
|
|
@ -106,6 +106,10 @@ public class MathUtils {
|
|||
return approxSum;
|
||||
}
|
||||
|
||||
public static double approximateLog10SumLog10(double a, double b, double c) {
|
||||
return approximateLog10SumLog10(a, approximateLog10SumLog10(b, c));
|
||||
}
|
||||
|
||||
public static double approximateLog10SumLog10(double small, double big) {
|
||||
// make sure small is really the smaller value
|
||||
if (small > big) {
|
||||
|
|
|
|||
|
|
@ -71,9 +71,9 @@ public class PairHMM {
|
|||
public double computeReadLikelihoodGivenHaplotype( final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals,
|
||||
final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP ) {
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
|
||||
final int X_METRIC_LENGTH = readBases.length + 1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
|
||||
|
||||
// initial arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||
final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
|
@ -91,9 +91,14 @@ public class PairHMM {
|
|||
final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP, final int hapStartIndex,
|
||||
final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
|
||||
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
|
||||
final int X_METRIC_LENGTH = readBases.length + 1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
|
||||
final int X_METRIC_LENGTH = readBases.length + 2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 2;
|
||||
|
||||
// ensure that all the qual scores have valid values
|
||||
for( int iii = 0; iii < readQuals.length; iii++ ) {
|
||||
readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
|
||||
}
|
||||
|
||||
if( doBanded ) {
|
||||
final ArrayList<Integer> workQueue = new ArrayList<Integer>(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step
|
||||
|
|
@ -205,7 +210,7 @@ public class PairHMM {
|
|||
// final probability is the log10 sum of the last element in all three state arrays
|
||||
final int endI = X_METRIC_LENGTH - 1;
|
||||
final int endJ = Y_METRIC_LENGTH - 1;
|
||||
return MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]});
|
||||
return MathUtils.approximateLog10SumLog10(matchMetricArray[endI][endJ], XMetricArray[endI][endJ], YMetricArray[endI][endJ]);
|
||||
}
|
||||
|
||||
private void updateCell( final int indI, final int indJ, final byte[] haplotypeBases, final byte[] readBases,
|
||||
|
|
@ -221,14 +226,13 @@ public class PairHMM {
|
|||
if( im1 > 0 && jm1 > 0 ) { // the emission probability is applied when leaving the state
|
||||
final byte x = readBases[im1-1];
|
||||
final byte y = haplotypeBases[jm1-1];
|
||||
final byte qual = ( readQuals[im1-1] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1-1]) );
|
||||
final byte qual = readQuals[im1-1];
|
||||
pBaseReadLog10 = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) );
|
||||
}
|
||||
final int qualIndexGOP = ( im1 == 0 ? DEFAULT_GOP + DEFAULT_GOP : ( insertionGOP[im1-1] + deletionGOP[im1-1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : insertionGOP[im1-1] + deletionGOP[im1-1]) );
|
||||
final double d0 = QualityUtils.qualToProbLog10((byte)qualIndexGOP);
|
||||
final double e0 = ( im1 == 0 ? QualityUtils.qualToProbLog10(DEFAULT_GCP) : QualityUtils.qualToProbLog10(overallGCP[im1-1]) );
|
||||
matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(
|
||||
new double[]{matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0});
|
||||
matchMetricArray[indI][indJ] = pBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ-1] + d0, XMetricArray[indI-1][indJ-1] + e0, YMetricArray[indI-1][indJ-1] + e0);
|
||||
|
||||
// update the X (insertion) array
|
||||
final double d1 = ( im1 == 0 ? QualityUtils.qualToErrorProbLog10(DEFAULT_GOP) : QualityUtils.qualToErrorProbLog10(insertionGOP[im1-1]) );
|
||||
|
|
@ -237,8 +241,8 @@ public class PairHMM {
|
|||
XMetricArray[indI][indJ] = qBaseReadLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI-1][indJ] + d1, XMetricArray[indI-1][indJ] + e1);
|
||||
|
||||
// update the Y (deletion) array, with penalty of zero on the left and right flanks to allow for a local alignment within the haplotype
|
||||
final double d2 = ( im1 == 0 || im1 == readBases.length - 1 ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
|
||||
final double e2 = ( im1 == 0 || im1 == readBases.length - 1 ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
|
||||
final double d2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(deletionGOP[im1-1]) );
|
||||
final double e2 = ( im1 == 0 || im1 == readBases.length ? 0.0 : QualityUtils.qualToErrorProbLog10(overallGCP[im1-1]) );
|
||||
final double qBaseRefLog10 = 0.0; // Math.log10(1.0) -- we don't have an estimate for this emission probability so assume q=1.0
|
||||
YMetricArray[indI][indJ] = qBaseRefLog10 + MathUtils.approximateLog10SumLog10(matchMetricArray[indI][indJ-1] + d2, YMetricArray[indI][indJ-1] + e2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,6 +73,8 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
|
|||
} catch ( UserException.MalformedGenomeLoc e ) {
|
||||
Utils.warnUser("RefSeq file is potentially incorrect, as some transcripts or exons have a negative length ("+fields[2]+")");
|
||||
return null;
|
||||
} catch ( NumberFormatException e ) {
|
||||
throw new UserException.MalformedFile("Could not parse location from line: " + line);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -473,7 +473,12 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
protected static Allele oneAllele(String index, List<Allele> alleles) {
|
||||
if ( index.equals(VCFConstants.EMPTY_ALLELE) )
|
||||
return Allele.NO_CALL;
|
||||
int i = Integer.valueOf(index);
|
||||
final int i;
|
||||
try {
|
||||
i = Integer.valueOf(index);
|
||||
} catch ( NumberFormatException e ) {
|
||||
throw new TribbleException.InternalCodecException("The following invalid GT allele index was encountered in the file: " + index);
|
||||
}
|
||||
if ( i >= alleles.size() )
|
||||
throw new TribbleException.InternalCodecException("The allele with index " + index + " is not defined in the REF/ALT columns in the record");
|
||||
return alleles.get(i);
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
}
|
||||
|
||||
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip) {
|
||||
this(read,offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, -1);
|
||||
this(read, offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, -1);
|
||||
}
|
||||
public boolean isDeletion() {
|
||||
return isDeletion;
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ public class BaseRecalibration {
|
|||
* Constructor using a GATK Report file
|
||||
*
|
||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||
* @param quantizationLevels number of bins to quantize the quality scores
|
||||
*/
|
||||
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
|
@ -80,7 +81,7 @@ public class BaseRecalibration {
|
|||
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
|
||||
byte qualityScore = originalQuals[offset];
|
||||
|
||||
if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
}
|
||||
|
|
|
|||
|
|
@ -381,15 +381,19 @@ public class AlignmentUtils {
|
|||
return alignment;
|
||||
}
|
||||
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, PileupElement pileup, final int alignmentStart, final int refLocus) {
|
||||
int pileupOffset = pileup.getOffset();
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
|
||||
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
|
||||
}
|
||||
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final int offset, final boolean isInsertionAtBeginningOfRead, final boolean isDeletion, final int alignmentStart, final int refLocus) {
|
||||
int pileupOffset = offset;
|
||||
|
||||
// Special case for reads starting with insertion
|
||||
if (pileup.isInsertionAtBeginningOfRead())
|
||||
if (isInsertionAtBeginningOfRead)
|
||||
return 0;
|
||||
|
||||
// Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases
|
||||
if (pileup.isDeletion()) {
|
||||
if (isDeletion) {
|
||||
pileupOffset = refLocus - alignmentStart;
|
||||
final CigarElement ce = cigar.getCigarElement(0);
|
||||
if (ce.getOperator() == CigarOperator.S) {
|
||||
|
|
@ -414,7 +418,7 @@ public class AlignmentUtils {
|
|||
break;
|
||||
case D:
|
||||
case N:
|
||||
if (!pileup.isDeletion()) {
|
||||
if (!isDeletion) {
|
||||
alignmentPos += elementLength;
|
||||
} else {
|
||||
if (pos + elementLength - 1 >= pileupOffset) {
|
||||
|
|
|
|||
|
|
@ -335,10 +335,11 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
/**
|
||||
* Clears all attributes except ReadGroup of the read.
|
||||
*/
|
||||
public void simplify () {
|
||||
public GATKSAMRecord simplify () {
|
||||
GATKSAMReadGroupRecord rg = getReadGroup();
|
||||
this.clearAttributes();
|
||||
setReadGroup(rg);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -53,6 +53,17 @@ public class BQSRKeyManagerUnitTest {
|
|||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOneCovariateWithOptionalCovariates() {
|
||||
final int nRequired = 1;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(4);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
covariates.add(new CycleCovariate());
|
||||
covariates.add(new ContextCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
private void createReadAndTest(List<Covariate> covariates, int nRequired) {
|
||||
int readLength = 1000;
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");
|
||||
|
|
|
|||
|
|
@ -66,6 +66,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH -I " + validationDataLocation + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("7678827a2ee21870a41c09d28d26b996"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing compressed output
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"58780f63182e139fdbe17f6c18b5b774", // recal file
|
||||
"f8e21a1987960b950db1f0d98be45352", // recal file
|
||||
"f67d844b6252a55452cf4167b77530b1"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"6d7ee4cb651c8b666e4a4523363caaff", // tranches
|
||||
"4759b111a5aa53975d46e0f22c7983bf", // recal file
|
||||
"ee5b408c8434a594496118875690c438", // recal file
|
||||
"5d7e07d8813db96ba3f3dfe4737f83d1"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
|
|
@ -118,5 +118,21 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
Arrays.asList(params.cutVCFMD5));
|
||||
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApplyRecalibrationSnpAndIndelTogether() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b37KGReference +
|
||||
" -T ApplyRecalibration" +
|
||||
" -L 20:1000100-1000500" +
|
||||
" -mode BOTH" +
|
||||
" -NO_HEADER" +
|
||||
" -input " + validationDataLocation + "VQSR.mixedTest.input" +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + validationDataLocation + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + validationDataLocation + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("08060b7f5c9cf3bb1692b50c58fd5a4b"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -271,6 +271,19 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
|
||||
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0, -2.5), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5, -1.1), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1, 0.5), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2, 1.3), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2, 18.1), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2, 26.6), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1, -45.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6, -26.2), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456, -0.34567), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
|
||||
Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101, -17.9341), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -194,9 +194,9 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
// context on either side is ACGTTGCA REF ACGTTGCA
|
||||
// test all combinations
|
||||
final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(30);
|
||||
final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30, 40, 50) : Arrays.asList(40);
|
||||
final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(20, 30, 40, 50) : Arrays.asList(40);
|
||||
final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 20, 30) : Arrays.asList(10);
|
||||
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,6,7,8,9,10,20) : Arrays.asList(2);
|
||||
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
|
||||
|
||||
for ( final int baseQual : baseQuals ) {
|
||||
for ( final int indelQual : indelQuals ) {
|
||||
|
|
@ -253,7 +253,7 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
final List<Integer> baseQuals = EXTENSIVE_TESTING ? Arrays.asList(25, 30, 40, 50) : Arrays.asList(30);
|
||||
final List<Integer> indelQuals = EXTENSIVE_TESTING ? Arrays.asList(30, 40, 50) : Arrays.asList(40);
|
||||
final List<Integer> gcps = EXTENSIVE_TESTING ? Arrays.asList(10, 12) : Arrays.asList(10);
|
||||
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,6,7,8,9,10,20) : Arrays.asList(2);
|
||||
final List<Integer> sizes = EXTENSIVE_TESTING ? Arrays.asList(2,3,4,5,7,8,9,10,20) : Arrays.asList(2);
|
||||
|
||||
for ( final int baseQual : baseQuals ) {
|
||||
for ( final int indelQual : indelQuals ) {
|
||||
|
|
@ -302,4 +302,66 @@ public class PairHMMUnitTest extends BaseTest {
|
|||
logger.warn(String.format("Test: logL calc=%.2f expected=%.2f for %s", calculatedLogL, expectedLogL, cfg.toString()));
|
||||
Assert.assertEquals(calculatedLogL, expectedLogL, cfg.tolerance());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchInEveryPositionInTheReadWithCenteredHaplotype() {
|
||||
byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
|
||||
|
||||
final int offset = 2;
|
||||
byte[] gop = new byte[haplotype1.length - 2 * offset];
|
||||
Arrays.fill(gop, (byte) 80);
|
||||
byte[] gcp = new byte[haplotype1.length - 2 * offset];
|
||||
Arrays.fill(gcp, (byte) 80);
|
||||
|
||||
for( int k = 0; k < haplotype1.length - 2 * offset; k++ ) {
|
||||
byte[] quals = new byte[haplotype1.length - 2 * offset];
|
||||
Arrays.fill(quals, (byte) 90);
|
||||
// one read mismatches the haplotype
|
||||
quals[k] = 20;
|
||||
|
||||
byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length-offset);
|
||||
// change single base at position k to C. If it's a C, change to T
|
||||
mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
|
||||
double res1 = hmm.computeReadLikelihoodGivenHaplotype(
|
||||
haplotype1, mread,
|
||||
quals, gop, gop,
|
||||
gcp);
|
||||
|
||||
|
||||
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||
|
||||
Assert.assertEquals(res1, -2.0, 1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchInEveryPositionInTheRead() {
|
||||
byte[] haplotype1 = "TTCTCTTCTGTTGTGGCTGGTT".getBytes();
|
||||
|
||||
final int offset = 2;
|
||||
byte[] gop = new byte[haplotype1.length - offset];
|
||||
Arrays.fill(gop, (byte) 80);
|
||||
byte[] gcp = new byte[haplotype1.length - offset];
|
||||
Arrays.fill(gcp, (byte) 80);
|
||||
|
||||
for( int k = 0; k < haplotype1.length - offset; k++ ) {
|
||||
byte[] quals = new byte[haplotype1.length - offset];
|
||||
Arrays.fill(quals, (byte) 90);
|
||||
// one read mismatches the haplotype
|
||||
quals[k] = 20;
|
||||
|
||||
byte[] mread = Arrays.copyOfRange(haplotype1,offset,haplotype1.length);
|
||||
// change single base at position k to C. If it's a C, change to T
|
||||
mread[k] = ( mread[k] == (byte)'C' ? (byte)'T' : (byte)'C');
|
||||
double res1 = hmm.computeReadLikelihoodGivenHaplotype(
|
||||
haplotype1, mread,
|
||||
quals, gop, gop,
|
||||
gcp);
|
||||
|
||||
|
||||
System.out.format("H:%s\nR: %s\n Pos:%d Result:%4.2f\n",new String(haplotype1), new String(mread), k,res1);
|
||||
|
||||
Assert.assertEquals(res1, -2.0, 1e-2);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue