add the CEUtrio best practices results (UG + PBT) to the bundle

This commit is contained in:
Ami Levy Moonshine 2012-10-25 15:36:17 -04:00
parent 90b9971033
commit dde3060bb8
2 changed files with 96 additions and 36 deletions

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.qc; package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -7,73 +8,125 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.List;
/** /**
* Created with IntelliJ IDEA. * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
* User: ami * the reduced bam are above sufficient threshold)
* Date: 10/19/12 *
* Time: 9:09 AM * <h2>Input</h2>
* To change this template use File | Settings | File Templates. * <p>
* The original and reduced BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -I:original original.bam \
* -I:reduced reduced.bam \
* -R ref.fasta \
* -T AssessReducedQuals \
* -o output.intervals
* </pre>
*
* @author ami
*/ */
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> { public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
private static final String original = "original";
private static final String reduced = "reduced"; private static final String reduced = "reduced";
private static final String original = "original";
private static final int originalQualsIndex = 0;
private static final int reducedQualsIndex = 1;
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
public int sufficientQualSum = 600;
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
public int qual_epsilon = 0;
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
public int debugLevel = 0;
@Output @Output
protected PrintStream out; protected PrintStream out;
public void initialize() {
if (debugLevel != 0)
out.println(" Debug mode" +
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
"Debug:\tqual_epsilon: "+qual_epsilon);
}
@Override @Override
public boolean includeReadsWithDeletionAtLoci() { return true; } public boolean includeReadsWithDeletionAtLoci() { return true; }
public void initialize() {} //todo: why we need that?
@Override @Override
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) if ( tracker == null )
return null; return null;
int originalQualsIndex = 0; boolean reportLocus;
int reducedQualsIndex = 1; final int[] quals = getPileupQuals(context.getBasePileup());
double epsilon = 0; if (debugLevel != 0)
double[] quals = getPileupQuals(context.getBasePileup()); out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
return (quals[originalQualsIndex] - quals[reducedQualsIndex] >= epsilon || quals[originalQualsIndex] - quals[reducedQualsIndex] <= -1*epsilon) ? ref.getLocus() : null; final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
if(debugLevel != 0 && reportLocus)
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
return reportLocus ? ref.getLocus() : null;
} }
private double[] getPileupQuals(final ReadBackedPileup readPileup) { private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
int originalQualsIndex = 0; final int[] quals = new int[2];
int reducedQualsIndex = 1; String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
double[] quals = new double[2]; "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
for( PileupElement p : readPileup){ for( PileupElement p : readPileup ){
if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() ){ final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
if (p.getRead().isReducedRead()){ if ( isGoodRead(p,tags) ){
double tempQual = (double)(p.getQual()) * p.getRepresentativeCount(); final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
quals[reducedQualsIndex] += tempQual; final int tagIndex = getTagIndex(tags);
} quals[tagIndex] += tempQual;
else if(debugLevel == 2)
{ printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
double tempQual = (double)(p.getQual());
quals[originalQualsIndex] += tempQual;
}
} }
} }
if(debugLevel == 2){
out.println(printPileup[originalQualsIndex]);
out.println(printPileup[reducedQualsIndex]);
}
return quals; return quals;
} }
//public void onTraversalDone(GenomeLoc sum) { private final boolean isGoodRead(PileupElement p, List<String> tags){
// if ( sum != null ) return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
// out.println(sum); }
//}
private final int getTagIndex(List<String> tags){
return tags.contains(reduced) ? 1 : 0;
}
@Override
public void onTraversalDone(GenomeLoc sum) {
if ( sum != null )
out.println(sum);
}
@Override @Override
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {

View File

@ -137,6 +137,13 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
"Mills_and_1000G_gold_standard.indels", b37, true, false)) "Mills_and_1000G_gold_standard.indels", b37, true, false))
//
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
//
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
// //
// example call set for wiki tutorial // example call set for wiki tutorial
// //