Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
29273abab7
|
|
@ -198,8 +198,8 @@ public class GenomeAnalysisEngine {
|
|||
private BaseRecalibration baseRecalibration = null;
|
||||
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
|
||||
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
|
||||
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
|
||||
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan);
|
||||
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) {
|
||||
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, emitOriginalQuals);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -239,7 +239,7 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||
if (args.BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN);
|
||||
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
|
||||
|
||||
// Determine how the threads should be divided between CPU vs. IO.
|
||||
determineThreadAllocation();
|
||||
|
|
|
|||
|
|
@ -220,6 +220,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName="disable_indel_quals", shortName = "DIQ", doc = "If true, disables printing of base insertion and base deletion tags (with -BQSR)", required=false)
|
||||
public boolean disableIndelQuals = false;
|
||||
|
||||
/**
|
||||
* By default, the OQ tag in not emitted when using the -BQSR argument.
|
||||
*/
|
||||
@Argument(fullName="emit_original_quals", shortName = "EOQ", doc = "If true, enables printing of the OQ tag with the original base qualities (with -BQSR)", required=false)
|
||||
public boolean emitOriginalQuals = false;
|
||||
|
||||
/**
|
||||
* Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file.
|
||||
* In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases.
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ public class BaseQualityScoreRecalibrator extends LocusWalker<Long, Long> implem
|
|||
|
||||
Class c = null;
|
||||
for ( Class<? extends RecalibrationEngine> REclass : REclasses ) {
|
||||
if ( REclass.isAssignableFrom(ProtectedPackageSource.class) ) {
|
||||
if ( ProtectedPackageSource.class.isAssignableFrom(REclass) ) {
|
||||
c = REclass;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -342,9 +342,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){
|
||||
|
||||
int phredScoreTransmission = -1;
|
||||
if(transmissionProb != NO_TRANSMISSION_PROB)
|
||||
phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
|
||||
|
||||
if(transmissionProb != NO_TRANSMISSION_PROB){
|
||||
double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
|
||||
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
|
||||
}
|
||||
//Handle null, missing and unavailable genotypes
|
||||
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
||||
//genotype so it is safe to return the original genotype in this case.
|
||||
|
|
@ -410,7 +411,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
buildMatrices();
|
||||
|
||||
if(mvFile != null)
|
||||
mvFile.println("#CHROM\tPOS\tFILTER\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_RAD\tMOTHER_AAD\tMOTHER_HRPL\tMOTHER_HETPL\tMOTHER_HAPL\tFATHER_GT\tFATHER_DP\tFATHER_RAD\tFATHER_AAD\tFATHER_HRPL\tFATHER_HETPL\tFATHER_HAPL\tCHILD_GT\tCHILD_DP\tCHILD_RAD\tCHILD_AAD\tCHILD_HRPL\tCHILD_HETPL\tCHILD_HAPL");
|
||||
mvFile.println("CHROM\tPOS\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_AD\tMOTHER_PL\tFATHER_GT\tFATHER_DP\tFATHER_AD\tFATHER_PL\tCHILD_GT\tCHILD_DP\tCHILD_AD\tCHILD_PL");
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -776,7 +777,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
return metricsCounters;
|
||||
|
||||
final VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
|
||||
if (vc == null)
|
||||
if (vc == null || !vc.isBiallelic())
|
||||
return metricsCounters;
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
|
|
@ -805,8 +806,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
if(father != null){
|
||||
genotypesContext.replace(phasedFather);
|
||||
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
|
||||
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
|
||||
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
|
||||
|
|
@ -817,8 +818,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters);
|
||||
if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType()))
|
||||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s",
|
||||
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
|
|
@ -828,15 +829,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
|
||||
if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
||||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
|
||||
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
|
||||
phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
|
||||
//Report violation if set so
|
||||
//TODO: ADAPT FOR PAIRS TOO!!
|
||||
if(mvCount>0 && mvFile != null)
|
||||
if(mvCount>0 && mvFile != null && !vc.isFiltered())
|
||||
mvFile.println(mvfLine);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,11 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMTag;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -51,6 +54,7 @@ public class BaseRecalibration {
|
|||
|
||||
private final boolean disableIndelQuals;
|
||||
private final int preserveQLessThan;
|
||||
private final boolean emitOriginalQuals;
|
||||
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
|
|
@ -66,7 +70,7 @@ public class BaseRecalibration {
|
|||
* @param disableIndelQuals if true, do not emit base indel qualities
|
||||
* @param preserveQLessThan preserve quality scores less than this value
|
||||
*/
|
||||
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) {
|
||||
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) {
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||
|
|
@ -80,6 +84,7 @@ public class BaseRecalibration {
|
|||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
this.disableIndelQuals = disableIndelQuals;
|
||||
this.preserveQLessThan = preserveQLessThan;
|
||||
this.emitOriginalQuals = emitOriginalQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -90,6 +95,14 @@ public class BaseRecalibration {
|
|||
* @param read the read to recalibrate
|
||||
*/
|
||||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
if (emitOriginalQuals && read.getAttribute(SAMTag.OQ.name()) == null) { // Save the old qualities if the tag isn't already taken in the read
|
||||
try {
|
||||
read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities()));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new UserException.MalformedBAM(read, "illegal base quality encountered; " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","f8721f4f5d3bae2848ae15c3f120709b")
|
||||
Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b")
|
||||
);
|
||||
executeTest("testTrueNegativeMV", spec);
|
||||
}
|
||||
|
|
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","547fdfef393f3045a96d245ef6af8acb")
|
||||
Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb")
|
||||
);
|
||||
executeTest("testTruePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","9529e2bf214d72e792d93fbea22a3b91")
|
||||
Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91")
|
||||
);
|
||||
executeTest("testFalsePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","8c157d79dd00063d2932f0d2b96f53d8")
|
||||
Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8")
|
||||
);
|
||||
executeTest("testSpecialCases", spec);
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","343e418850ae4a687ebef2acd55fcb07")
|
||||
Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07")
|
||||
);
|
||||
executeTest("testPriorOption", spec);
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-fatherAlleleFirst"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("60ced3d078792a150a03640b62926857","52ffa82428e63ade22ea37b72ae58492")
|
||||
Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492")
|
||||
);
|
||||
executeTest("testFatherAlleleFirst", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest {
|
|||
" -blasr ",
|
||||
" -test ",
|
||||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString
|
||||
spec.fileMD5s += testOut -> "cf147e7f56806598371f8d5d6794b852"
|
||||
spec.fileMD5s += testOut -> "2f2026882a2850bb14a858524158d5a8"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue