Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-04-19 19:40:03 -04:00
commit d2488dfb81
10 changed files with 86 additions and 46 deletions

View File

@ -125,8 +125,8 @@ public class ContextCovariate implements StandardCovariate {
*/
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
BitSet result = null;
if (offset >= contextSize) {
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (offset - contextSize + 1 >= 0) {
String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1));
if (!context.contains("N"))
result = BitSetUtils.bitSetFrom(context);
}

View File

@ -76,7 +76,7 @@ public class Datum {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
byte empiricalQualByte() {

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair;
@ -152,7 +151,7 @@ public class RecalDataManager {
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
for (Covariate covariate : requiredCovariates) {
requiredCovariatesToAdd.add(covariate);
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
}

View File

@ -74,7 +74,7 @@ public class RecalDatum extends Datum {
}
public final void calcCombinedEmpiricalQuality() {
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public final void calcEstimatedReportedQuality() {
@ -102,7 +102,7 @@ public class RecalDatum extends Datum {
@Override
public String toString() {
return String.format("%d,%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()), (byte) Math.floor(getEstimatedQReported()));
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}

View File

@ -20,10 +20,8 @@ import java.util.*;
public class AlleleCount extends VariantStratifier {
@Override
public void initialize() {
List<RodBinding<VariantContext>> evals = getVariantEvalWalker().getEvals();
// we can only work with a single eval VCF, and it must have genotypes
if ( evals.size() != 1 )
if ( getVariantEvalWalker().getEvals().size() != 1 && !getVariantEvalWalker().mergeEvals )
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf");
// There are 2 x n sample chromosomes for diploids

View File

@ -9,6 +9,7 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
public final static byte MAX_RECALIBRATED_Q_SCORE = 50;
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);

View File

@ -68,9 +68,9 @@ public class BaseRecalibration {
/**
* This constructor only exists for testing purposes.
*
* @param quantizationInfo
* @param keysAndTablesMap
* @param requestedCovariates
* @param quantizationInfo the quantization info object
* @param keysAndTablesMap the map of key managers and recalibration tables
* @param requestedCovariates the list of requested covariates
*/
protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, ArrayList<Covariate> requestedCovariates) {
this.quantizationInfo = quantizationInfo;
@ -179,9 +179,8 @@ public class BaseRecalibration {
}
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
}

View File

@ -39,8 +39,8 @@ public class ContextCovariateUnitTest {
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
for (int i = 0; i < values.length; i++) {
String expectedContext = null;
if (i >= contextSize) {
String context = bases.substring(i - contextSize, i);
if (i - contextSize + 1 >= 0) {
String context = bases.substring(i - contextSize + 1, i + 1);
if (!context.contains("N"))
expectedContext = context;
}

View File

@ -34,6 +34,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf";
private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
private static String cmdRoot = "-T VariantEval" +
@ -437,24 +439,69 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testAlleleCountStrat() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + fundamentalTestSNPsVCF,
"-noEV",
"-EV CountVariants",
"-noST",
"-ST AlleleCount",
"-L " + fundamentalTestSNPsVCF,
"-o %s"
),
1,
Arrays.asList("1198bfea6183bd43219071a84c79a386")
);
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + fundamentalTestSNPsVCF,
"-noEV",
"-EV CountVariants",
"-noST",
"-ST AlleleCount",
"-L " + fundamentalTestSNPsVCF,
"-o %s"
),
1,
Arrays.asList("1198bfea6183bd43219071a84c79a386")
);
executeTest("testAlleleCountStrat", spec);
}
@Test
public void testMultipleEvalTracksAlleleCountWithMerge() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + fundamentalTestSNPsSplit1of2VCF,
"--eval " + fundamentalTestSNPsSplit2of2VCF,
"--mergeEvals",
"-noEV",
"-EV CountVariants",
"-noST",
"-ST AlleleCount",
"-L " + fundamentalTestSNPsVCF,
"-o %s"
),
1,
Arrays.asList("1198bfea6183bd43219071a84c79a386")
);
executeTest("testMultipleEvalTracksAlleleCountWithMerge", spec);
}
@Test
public void testMultipleEvalTracksAlleleCountWithoutMerge() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + fundamentalTestSNPsSplit1of2VCF,
"--eval " + fundamentalTestSNPsSplit2of2VCF,
//"--mergeEvals", No merge with AC strat ==> error
"-noEV",
"-EV CountVariants",
"-noST",
"-ST AlleleCount",
"-L " + fundamentalTestSNPsVCF
),
0,
UserException.class
);
executeTest("testMultipleEvalTracksAlleleCountWithoutMerge", spec);
}
@Test
public void testIntervalStrat() {
WalkerTestSpec spec = new WalkerTestSpec(

View File

@ -24,10 +24,6 @@ public class BaseRecalibrationUnitTest {
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap;
private BQSRKeyManager rgKeyManager;
private BQSRKeyManager qsKeyManager;
private BQSRKeyManager cvKeyManager;
private ReadGroupCovariate rgCovariate;
private QualityScoreCovariate qsCovariate;
private ContextCovariate cxCovariate;
@ -60,13 +56,13 @@ public class BaseRecalibrationUnitTest {
rgCovariate = new ReadGroupCovariate();
rgCovariate.initialize(RAC);
requiredCovariates.add(rgCovariate);
rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
keysAndTablesMap.put(rgKeyManager, new HashMap<BitSet, RecalDatum>());
qsCovariate = new QualityScoreCovariate();
qsCovariate.initialize(RAC);
requiredCovariates.add(qsCovariate);
qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
keysAndTablesMap.put(qsKeyManager, new HashMap<BitSet, RecalDatum>());
cxCovariate = new ContextCovariate();
@ -75,7 +71,7 @@ public class BaseRecalibrationUnitTest {
cyCovariate = new CycleCovariate();
cyCovariate.initialize(RAC);
optionalCovariates.add(cyCovariate);
cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
keysAndTablesMap.put(cvKeyManager, new HashMap<BitSet, RecalDatum>());
@ -108,7 +104,7 @@ public class BaseRecalibrationUnitTest {
updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum);
}
}
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_QUAL_SCORE);
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
List<Byte> quantizedQuals = new ArrayList<Byte>();
List<Long> qualCounts = new ArrayList<Long>();
@ -179,7 +175,7 @@ public class BaseRecalibrationUnitTest {
BitSet key = entry.getKey();
RecalDatum datum = entry.getValue();
List<Object> keySet = keyManager.keySetFrom(key);
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum));
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported());
}
System.out.println();
}
@ -187,9 +183,9 @@ public class BaseRecalibrationUnitTest {
}
private static void printNestedHashMap(Map<Object,Object> table, String output) {
private static void printNestedHashMap(Map table, String output) {
for (Object key : table.keySet()) {
String ret = "";
String ret;
if (output.isEmpty())
ret = "" + key;
else
@ -199,7 +195,7 @@ public class BaseRecalibrationUnitTest {
if (next instanceof org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum)
System.out.println(ret + " => " + next);
else
printNestedHashMap((Map<Object, Object>) next, "" + ret);
printNestedHashMap((Map) next, "" + ret);
}
}
@ -269,7 +265,7 @@ public class BaseRecalibrationUnitTest {
}
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE);
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
// Verbose printouts used to validate with old recalibrator
//if(key.contains(null)) {
@ -289,6 +285,6 @@ public class BaseRecalibrationUnitTest {
final double doubleMismatches = (double) (errors + smoothing);
final double doubleObservations = (double) ( observations + smoothing );
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(QualityUtils.MAX_QUAL_SCORE, empiricalQual);
return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual);
}
}