diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
index c8ec33364..e74684c53 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
@@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.qc;
+import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@@ -7,73 +8,125 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
+import java.util.List;
/**
- * Created with IntelliJ IDEA.
- * User: ami
- * Date: 10/19/12
- * Time: 9:09 AM
- * To change this template use File | Settings | File Templates.
+ * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
+ * the reduced bam are above sufficient threshold)
+ *
+ *
Input
+ *
+ * The original and reduced BAM files.
+ *
+ *
+ * Output
+ *
+ * A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
+ *
+ *
+ * Examples
+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ * -I:original original.bam \
+ * -I:reduced reduced.bam \
+ * -R ref.fasta \
+ * -T AssessReducedQuals \
+ * -o output.intervals
+ *
+ *
+ * @author ami
*/
+
public class AssessReducedQuals extends LocusWalker implements TreeReducible {
- private static final String original = "original";
private static final String reduced = "reduced";
+ private static final String original = "original";
+ private static final int originalQualsIndex = 0;
+ private static final int reducedQualsIndex = 1;
+
+ @Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
+ public int sufficientQualSum = 600;
+
+ @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
+ public int qual_epsilon = 0;
+
+ @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
+ public int debugLevel = 0;
@Output
protected PrintStream out;
+ public void initialize() {
+ if (debugLevel != 0)
+ out.println(" Debug mode" +
+ "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
+ "Debug:\tqual_epsilon: "+qual_epsilon);
+ }
+
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
- public void initialize() {} //todo: why we need that?
-
@Override
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
- int originalQualsIndex = 0;
- int reducedQualsIndex = 1;
- double epsilon = 0;
- double[] quals = getPileupQuals(context.getBasePileup());
- return (quals[originalQualsIndex] - quals[reducedQualsIndex] >= epsilon || quals[originalQualsIndex] - quals[reducedQualsIndex] <= -1*epsilon) ? ref.getLocus() : null;
+ boolean reportLocus;
+ final int[] quals = getPileupQuals(context.getBasePileup());
+ if (debugLevel != 0)
+ out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
+ final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
+ final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
+ final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
+ final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
+ reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
+ if(debugLevel != 0 && reportLocus)
+ out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
+ return reportLocus ? ref.getLocus() : null;
}
- private double[] getPileupQuals(final ReadBackedPileup readPileup) {
+ private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
- int originalQualsIndex = 0;
- int reducedQualsIndex = 1;
- double[] quals = new double[2];
+ final int[] quals = new int[2];
+ String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
+ "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
- for( PileupElement p : readPileup){
- if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() ){
- if (p.getRead().isReducedRead()){
- double tempQual = (double)(p.getQual()) * p.getRepresentativeCount();
- quals[reducedQualsIndex] += tempQual;
- }
- else
- {
- double tempQual = (double)(p.getQual());
- quals[originalQualsIndex] += tempQual;
- }
+ for( PileupElement p : readPileup ){
+ final List tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
+ if ( isGoodRead(p,tags) ){
+ final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
+ final int tagIndex = getTagIndex(tags);
+ quals[tagIndex] += tempQual;
+ if(debugLevel == 2)
+ printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
}
-
}
-
-
+ if(debugLevel == 2){
+ out.println(printPileup[originalQualsIndex]);
+ out.println(printPileup[reducedQualsIndex]);
+ }
return quals;
}
- //public void onTraversalDone(GenomeLoc sum) {
- // if ( sum != null )
- // out.println(sum);
- //}
+ private final boolean isGoodRead(PileupElement p, List tags){
+ return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
+ }
+
+ private final int getTagIndex(List tags){
+ return tags.contains(reduced) ? 1 : 0;
+ }
+
+ @Override
+ public void onTraversalDone(GenomeLoc sum) {
+ if ( sum != null )
+ out.println(sum);
+ }
@Override
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
index 5e66520ca..9e8815762 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
@@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
"Mills_and_1000G_gold_standard.indels", b37, true, false))
-
+
+ //
+ // CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
+ //
+
+ addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
+ "CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
+
//
// example call set for wiki tutorial
//