Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-07-17 16:58:12 -04:00
commit 29273abab7
7 changed files with 44 additions and 24 deletions

View File

@ -198,8 +198,8 @@ public class GenomeAnalysisEngine {
private BaseRecalibration baseRecalibration = null;
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan);
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) {
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, emitOriginalQuals);
}
/**
@ -239,7 +239,7 @@ public class GenomeAnalysisEngine {
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
if (args.BQSR_RECAL_FILE != null)
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN);
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
// Determine how the threads should be divided between CPU vs. IO.
determineThreadAllocation();

View File

@ -220,6 +220,12 @@ public class GATKArgumentCollection {
@Argument(fullName="disable_indel_quals", shortName = "DIQ", doc = "If true, disables printing of base insertion and base deletion tags (with -BQSR)", required=false)
public boolean disableIndelQuals = false;
/**
* By default, the OQ tag in not emitted when using the -BQSR argument.
*/
@Argument(fullName="emit_original_quals", shortName = "EOQ", doc = "If true, enables printing of the OQ tag with the original base qualities (with -BQSR)", required=false)
public boolean emitOriginalQuals = false;
/**
* Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file.
* In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases.

View File

@ -185,7 +185,7 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
Class c = null;
for ( Class<? extends RecalibrationEngine> REclass : REclasses ) {
if ( REclass.isAssignableFrom(ProtectedPackageSource.class) ) {
if ( ProtectedPackageSource.class.isAssignableFrom(REclass) ) {
c = REclass;
break;
}

View File

@ -342,9 +342,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){
int phredScoreTransmission = -1;
if(transmissionProb != NO_TRANSMISSION_PROB)
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
if(transmissionProb != NO_TRANSMISSION_PROB){
double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
}
//Handle null, missing and unavailable genotypes
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
//genotype so it is safe to return the original genotype in this case.
@ -410,7 +411,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
buildMatrices();
if(mvFile != null)
mvFile.println("#CHROM\tPOS\tFILTER\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_RAD\tMOTHER_AAD\tMOTHER_HRPL\tMOTHER_HETPL\tMOTHER_HAPL\tFATHER_GT\tFATHER_DP\tFATHER_RAD\tFATHER_AAD\tFATHER_HRPL\tFATHER_HETPL\tFATHER_HAPL\tCHILD_GT\tCHILD_DP\tCHILD_RAD\tCHILD_AAD\tCHILD_HRPL\tCHILD_HETPL\tCHILD_HAPL");
mvFile.println("CHROM\tPOS\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_AD\tMOTHER_PL\tFATHER_GT\tFATHER_DP\tFATHER_AD\tFATHER_PL\tCHILD_GT\tCHILD_DP\tCHILD_AD\tCHILD_PL");
}
@ -776,7 +777,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
return metricsCounters;
final VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
if (vc == null)
if (vc == null || !vc.isBiallelic())
return metricsCounters;
final VariantContextBuilder builder = new VariantContextBuilder(vc);
@ -805,8 +806,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(father != null){
genotypesContext.replace(phasedFather);
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
@ -817,8 +818,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters);
if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType()))
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s",
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
}
@ -828,15 +829,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
}
//Report violation if set so
//TODO: ADAPT FOR PAIRS TOO!!
if(mvCount>0 && mvFile != null)
if(mvCount>0 && mvFile != null && !vc.isFiltered())
mvFile.println(mvfLine);
}

View File

@ -25,11 +25,14 @@
package org.broadinstitute.sting.utils.recalibration;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
@ -51,6 +54,7 @@ public class BaseRecalibration {
private final boolean disableIndelQuals;
private final int preserveQLessThan;
private final boolean emitOriginalQuals;
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
static {
@ -66,7 +70,7 @@ public class BaseRecalibration {
* @param disableIndelQuals if true, do not emit base indel qualities
* @param preserveQLessThan preserve quality scores less than this value
*/
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) {
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
recalibrationTables = recalibrationReport.getRecalibrationTables();
@ -80,6 +84,7 @@ public class BaseRecalibration {
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
this.disableIndelQuals = disableIndelQuals;
this.preserveQLessThan = preserveQLessThan;
this.emitOriginalQuals = emitOriginalQuals;
}
/**
@ -90,6 +95,14 @@ public class BaseRecalibration {
* @param read the read to recalibrate
*/
public void recalibrateRead(final GATKSAMRecord read) {
if (emitOriginalQuals && read.getAttribute(SAMTag.OQ.name()) == null) { // Save the old qualities if the tag isn't already taken in the read
try {
read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities()));
} catch (IllegalArgumentException e) {
throw new UserException.MalformedBAM(read, "illegal base quality encountered; " + e.getMessage());
}
}
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {

View File

@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","f8721f4f5d3bae2848ae15c3f120709b")
Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b")
);
executeTest("testTrueNegativeMV", spec);
}
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","547fdfef393f3045a96d245ef6af8acb")
Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb")
);
executeTest("testTruePositiveMV", spec);
}
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","9529e2bf214d72e792d93fbea22a3b91")
Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91")
);
executeTest("testFalsePositiveMV", spec);
}
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","8c157d79dd00063d2932f0d2b96f53d8")
Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8")
);
executeTest("testSpecialCases", spec);
}
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","343e418850ae4a687ebef2acd55fcb07")
Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07")
);
executeTest("testPriorOption", spec);
}
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-fatherAlleleFirst"
),
2,
Arrays.asList("60ced3d078792a150a03640b62926857","52ffa82428e63ade22ea37b72ae58492")
Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492")
);
executeTest("testFatherAlleleFirst", spec);
}

View File

@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest {
" -blasr ",
" -test ",
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "cf147e7f56806598371f8d5d6794b852"
spec.fileMD5s += testOut -> "2f2026882a2850bb14a858524158d5a8"
PipelineTest.executeTest(spec)
}
}