add the CEUtrio best practices results (UG + PBT) to the bundle
This commit is contained in:
parent
90b9971033
commit
dde3060bb8
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -7,73 +8,125 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created with IntelliJ IDEA.
|
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
|
||||||
* User: ami
|
* the reduced bam are above sufficient threshold)
|
||||||
* Date: 10/19/12
|
*
|
||||||
* Time: 9:09 AM
|
* <h2>Input</h2>
|
||||||
* To change this template use File | Settings | File Templates.
|
* <p>
|
||||||
|
* The original and reduced BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -I:original original.bam \
|
||||||
|
* -I:reduced reduced.bam \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T AssessReducedQuals \
|
||||||
|
* -o output.intervals
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author ami
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||||
|
|
||||||
private static final String original = "original";
|
|
||||||
private static final String reduced = "reduced";
|
private static final String reduced = "reduced";
|
||||||
|
private static final String original = "original";
|
||||||
|
private static final int originalQualsIndex = 0;
|
||||||
|
private static final int reducedQualsIndex = 1;
|
||||||
|
|
||||||
|
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
|
||||||
|
public int sufficientQualSum = 600;
|
||||||
|
|
||||||
|
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||||
|
public int qual_epsilon = 0;
|
||||||
|
|
||||||
|
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
|
||||||
|
public int debugLevel = 0;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if (debugLevel != 0)
|
||||||
|
out.println(" Debug mode" +
|
||||||
|
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
|
||||||
|
"Debug:\tqual_epsilon: "+qual_epsilon);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
public void initialize() {} //todo: why we need that?
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
int originalQualsIndex = 0;
|
boolean reportLocus;
|
||||||
int reducedQualsIndex = 1;
|
final int[] quals = getPileupQuals(context.getBasePileup());
|
||||||
double epsilon = 0;
|
if (debugLevel != 0)
|
||||||
double[] quals = getPileupQuals(context.getBasePileup());
|
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
|
||||||
return (quals[originalQualsIndex] - quals[reducedQualsIndex] >= epsilon || quals[originalQualsIndex] - quals[reducedQualsIndex] <= -1*epsilon) ? ref.getLocus() : null;
|
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
|
||||||
|
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
|
||||||
|
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
|
||||||
|
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
|
||||||
|
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
|
||||||
|
if(debugLevel != 0 && reportLocus)
|
||||||
|
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
|
||||||
|
|
||||||
|
return reportLocus ? ref.getLocus() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double[] getPileupQuals(final ReadBackedPileup readPileup) {
|
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||||
|
|
||||||
int originalQualsIndex = 0;
|
final int[] quals = new int[2];
|
||||||
int reducedQualsIndex = 1;
|
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
|
||||||
double[] quals = new double[2];
|
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
|
||||||
|
|
||||||
for( PileupElement p : readPileup){
|
for( PileupElement p : readPileup ){
|
||||||
if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() ){
|
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
|
||||||
if (p.getRead().isReducedRead()){
|
if ( isGoodRead(p,tags) ){
|
||||||
double tempQual = (double)(p.getQual()) * p.getRepresentativeCount();
|
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
|
||||||
quals[reducedQualsIndex] += tempQual;
|
final int tagIndex = getTagIndex(tags);
|
||||||
}
|
quals[tagIndex] += tempQual;
|
||||||
else
|
if(debugLevel == 2)
|
||||||
{
|
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
|
||||||
double tempQual = (double)(p.getQual());
|
|
||||||
quals[originalQualsIndex] += tempQual;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
if(debugLevel == 2){
|
||||||
|
out.println(printPileup[originalQualsIndex]);
|
||||||
|
out.println(printPileup[reducedQualsIndex]);
|
||||||
|
}
|
||||||
return quals;
|
return quals;
|
||||||
}
|
}
|
||||||
|
|
||||||
//public void onTraversalDone(GenomeLoc sum) {
|
private final boolean isGoodRead(PileupElement p, List<String> tags){
|
||||||
// if ( sum != null )
|
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||||
// out.println(sum);
|
}
|
||||||
//}
|
|
||||||
|
private final int getTagIndex(List<String> tags){
|
||||||
|
return tags.contains(reduced) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(GenomeLoc sum) {
|
||||||
|
if ( sum != null )
|
||||||
|
out.println(sum);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,13 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
||||||
"Mills_and_1000G_gold_standard.indels", b37, true, false))
|
"Mills_and_1000G_gold_standard.indels", b37, true, false))
|
||||||
|
|
||||||
|
//
|
||||||
|
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
|
||||||
|
//
|
||||||
|
|
||||||
|
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
|
||||||
|
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
|
||||||
|
|
||||||
//
|
//
|
||||||
// example call set for wiki tutorial
|
// example call set for wiki tutorial
|
||||||
//
|
//
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue