Modifications:
QualityUtils - Stole the BaseUtils code for flipping reads around and applied it to quality scores SecondBaseSkew - Nothing's really different, just a commented line Additions (experimental annotations for future development of second-base annotation) ** I DO NOT INTEND FOR ANYONE TO USE THESE ** - ProportionOfNonrefBasesSupportingSNP - ProportionOfSNPSecondBasesSupportingRef - ProportionOfRefSecondBasesSupportingSNP + I hope these are self-explanatory - QualityAdjustedSecondBaseLod + Adjust lod-score by 10*log10[P[second bases are as observed]] Added walker: QualityScoreByStrand - oneoff project that's being saved if i ever need it git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2527 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
eb899741e1
commit
a32245f7d2
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Dec 17, 2009
|
||||
* Time: 2:48:15 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProportionOfNonrefBasesSupportingSNP implements VariantAnnotation{
|
||||
private String KEY_NAME = "prop_nonref_that_are_snp";
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() {
|
||||
return new VCFInfoHeaderLine(KEY_NAME,
|
||||
1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of non-reference bases that are the SNP base");
|
||||
}
|
||||
|
||||
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, Variation var) {
|
||||
if ( ! var.isSNP() || ! var.isBiallelic() ) {
|
||||
return null;
|
||||
} else {
|
||||
Pair<Integer,Integer> totalNonref_totalSNP = new Pair<Integer,Integer>(0,0);
|
||||
for ( String sample : context.keySet() ) {
|
||||
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalNonref_totalSNP);
|
||||
|
||||
}
|
||||
double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP);
|
||||
return String.format("%f", p );
|
||||
}
|
||||
}
|
||||
|
||||
private Pair<Integer,Integer> getNonrefAndSNP(ReadBackedPileup p, char ref, char snp, Pair<Integer,Integer> totals) {
|
||||
int[] counts = p.getBaseCounts();
|
||||
int nonrefCounts = 0;
|
||||
int snpCounts = counts[BaseUtils.simpleBaseToBaseIndex(snp)];
|
||||
for ( char c : BaseUtils.BASES ) {
|
||||
if ( ! BaseUtils.basesAreEqual((byte) c, (byte) ref) ) {
|
||||
nonrefCounts += counts[BaseUtils.simpleBaseToBaseIndex(c)];
|
||||
}
|
||||
}
|
||||
|
||||
totals.first+=nonrefCounts;
|
||||
totals.second+=snpCounts;
|
||||
return totals;
|
||||
}
|
||||
|
||||
private double getProportionOfNonrefBasesThatAreSNP( Pair<Integer,Integer> totalNonref_totalSNP ) {
|
||||
return ( 1.0 + totalNonref_totalSNP.second ) / (1.0 + totalNonref_totalSNP.first );
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Dec 17, 2009
|
||||
* Time: 2:18:43 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProportionOfRefSecondBasesSupportingSNP implements VariantAnnotation{
|
||||
private String KEY_NAME = "ref_2bb_snp_prop";
|
||||
private boolean USE_MAPQ0_READS = false;
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, Variation var) {
|
||||
if ( ! var.isSNP() || ! var.isBiallelic() ) {
|
||||
return null;
|
||||
} else {
|
||||
Pair<Integer,Integer> totalAndSNPSupporting = new Pair<Integer,Integer>(0,0);
|
||||
for ( String sample : context.keySet() ) {
|
||||
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting);
|
||||
|
||||
}
|
||||
double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting);
|
||||
return String.format("%f", p );
|
||||
}
|
||||
}
|
||||
|
||||
private double getProportionOfRefSecondaryBasesSupportingSNP(Pair<Integer,Integer> tRef_snpSupport) {
|
||||
return ( 1.0 + tRef_snpSupport.second) / (1.0 + tRef_snpSupport.first );
|
||||
}
|
||||
|
||||
private Pair<Integer,Integer> getTotalRefAndSNPSupportCounts(ReadBackedPileup p, char ref, char snp, Pair<Integer,Integer> refAndSNPCounts) {
|
||||
int nRefBases = 0;
|
||||
int nSecondBasesSupportingSNP = 0;
|
||||
for (PileupElement e : p ) {
|
||||
if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref ) ) {
|
||||
if ( BaseUtils.isRegularBase(e.getSecondBase()) ) {
|
||||
nRefBases++;
|
||||
if ( BaseUtils.basesAreEqual( e.getSecondBase(), (byte) snp ) ) {
|
||||
nSecondBasesSupportingSNP++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
refAndSNPCounts.first+=nRefBases;
|
||||
refAndSNPCounts.second+=nSecondBasesSupportingSNP;
|
||||
return refAndSNPCounts;
|
||||
}
|
||||
|
||||
public VCFInfoHeaderLine getDescription() {
|
||||
return new VCFInfoHeaderLine(KEY_NAME,
|
||||
1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of second best base calls for reference base that support the SNP base");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Dec 17, 2009
|
||||
* Time: 2:42:05 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProportionOfSNPSecondBasesSupportingRef implements VariantAnnotation{
|
||||
public String KEY_NAME = "SNP_2B_SUPPORT_REF";
|
||||
public boolean USE_MAPQ0_READS = false;
|
||||
public String debug_file = "/humgen/gsa-scr1/chartl/temporary/ProportionOfRefSecondBasesSupportingSNP.debug.txt";
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public boolean useZeroQualityReads() { return USE_MAPQ0_READS; }
|
||||
|
||||
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, Variation var) {
|
||||
if ( ! var.isSNP() || ! var.isBiallelic() ) {
|
||||
return null;
|
||||
} else {
|
||||
Pair<Integer,Integer> totalAndSNPSupporting = new Pair<Integer,Integer>(0,0);
|
||||
for ( String sample : context.keySet() ) {
|
||||
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting);
|
||||
|
||||
}
|
||||
double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting);
|
||||
return String.format("%f", p );
|
||||
}
|
||||
}
|
||||
|
||||
public double getProportionOfSNPSecondaryBasesSupportingRef(Pair<Integer,Integer> tSNP_refSupport) {
|
||||
return ( 1.0 + tSNP_refSupport.second) / (1.0 + tSNP_refSupport.first );
|
||||
}
|
||||
|
||||
public Pair<Integer,Integer> getTotalSNPandRefSupporting(ReadBackedPileup p, char ref, char snp, Pair<Integer,Integer> SNPandRefCounts) {
|
||||
int nSNPBases = 0;
|
||||
int nSNPBasesSupportingRef = 0;
|
||||
for (PileupElement e : p ) {
|
||||
if ( BaseUtils.basesAreEqual( e.getBase(), (byte) snp ) ) {
|
||||
if ( hasSecondBase(e) ) {
|
||||
nSNPBases++;
|
||||
if ( BaseUtils.basesAreEqual( e.getSecondBase(), (byte) ref ) ) {
|
||||
nSNPBasesSupportingRef++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SNPandRefCounts.first+=nSNPBases;
|
||||
SNPandRefCounts.second+=nSNPBasesSupportingRef;
|
||||
return SNPandRefCounts;
|
||||
}
|
||||
|
||||
|
||||
public boolean hasSecondBase(PileupElement e) {
|
||||
return BaseUtils.isRegularBase(e.getSecondBase());
|
||||
}
|
||||
|
||||
public VCFInfoHeaderLine getDescription() {
|
||||
return new VCFInfoHeaderLine(KEY_NAME,
|
||||
1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of second best base calls for SNP base that support the Ref base");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Dec 19, 2009
|
||||
* Time: 1:02:09 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class QualityAdjustedSecondBaseLod implements VariantAnnotation {
|
||||
private final String KEY_NAME = "Qual_Adjusted_2blod";
|
||||
private final double CHI_LOD_MAX = -1000.0;
|
||||
private final SecondBaseSkew skewCalc = new SecondBaseSkew();
|
||||
private final double log10e = Math.log10(Math.E);
|
||||
private final double log10half = Math.log10(1.0/2);
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(KEY_NAME, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Adjusted residual quality based on second-base skew"); }
|
||||
|
||||
public String annotate( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> contexts, Variation variant) {
|
||||
String chi = skewCalc.annotate(tracker, ref,contexts,variant);
|
||||
if ( chi == null )
|
||||
return null;
|
||||
double chi_square = Double.valueOf(chi);
|
||||
double chi_loglik = chi_square <= 0.0 ? 0.0 : Math.max(-(chi_square/2.0)*log10e + log10half,CHI_LOD_MAX); // cap it...
|
||||
return String.format("%f", 10*(variant.getNegLog10PError() + chi_loglik));
|
||||
}
|
||||
}
|
||||
|
|
@ -21,9 +21,9 @@ import java.util.Map;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SecondBaseSkew implements VariantAnnotation {
|
||||
private final static double epsilon = Math.pow(10.0,-12);
|
||||
private final static double epsilon = Math.pow(10.0,-12.0);
|
||||
private final static String KEY_NAME = "2b_Chi";
|
||||
private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3};
|
||||
private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3.0, 2.0/3.0};
|
||||
private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO;
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
|
@ -34,11 +34,12 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
if ( !variation.isBiallelic() || !variation.isSNP() )
|
||||
return null;
|
||||
|
||||
char snp = variation.getAlternativeBaseForSNP();
|
||||
char alternate = variation.getAlternativeBaseForSNP();
|
||||
|
||||
Pair<Integer, Integer> depth = new Pair<Integer, Integer>(0, 0);
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
Pair<Integer, Integer> sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), snp);
|
||||
//Pair<Integer,Integer> sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(),stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(), alternate);
|
||||
Pair<Integer, Integer> sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), alternate);
|
||||
depth.first += sampleDepth.first;
|
||||
depth.second += sampleDepth.second;
|
||||
}
|
||||
|
|
@ -47,12 +48,8 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
return null;
|
||||
|
||||
double biasedProportion = (1.0 + depth.second) / (1.0 + depth.first);
|
||||
|
||||
//System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond());
|
||||
double p_transformed = transform(biasedProportion, depth.first+1);
|
||||
double expected_transformed = transform(proportionExpectations[0], depth.first+1);
|
||||
// System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst());
|
||||
// System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]);
|
||||
double chi_square = Math.signum(biasedProportion - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE);
|
||||
return String.format("%f", chi_square);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,230 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.playground.utils.PoolUtils;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad;
|
||||
import sun.misc.Cache;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.io.PrintStream;
|
||||
import java.io.PrintWriter;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Dec 15, 2009
|
||||
* Time: 11:56:22 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
/*
|
||||
* This walker prints out quality score counts for forward and reverse stranded reads aggregated over all loci
|
||||
* in the interval. Furthermore, it prints out quality score counts at a particular offset of forward and reverse
|
||||
* reads, aggregated across all paired-end reads in the interval.
|
||||
*
|
||||
* @Author: Chris Hartl
|
||||
*/
|
||||
public class QualityScoreByStrandWalker extends LocusWalker<StrandedCounts,StrandedCounts> {
|
||||
@Argument(fullName="readLength", shortName="rl", doc="Maximum length of the reads in the bam file", required=true)
|
||||
int maxReadLength = -1;
|
||||
@Argument(fullName="locusCountsOutput", shortName="lcf", doc="File to print locus count information to", required=true)
|
||||
String locusOutput = null;
|
||||
@Argument(fullName="pairCountsOutput", shortName="pcf", doc="File to print pair count information to", required=true)
|
||||
String pairOutput = null;
|
||||
@Argument(fullName="useCycle", shortName="c", doc="Use cycle directly rather than strand", required=false)
|
||||
boolean useCycle = false;
|
||||
|
||||
public HashMap pairCache = new HashMap();
|
||||
|
||||
public StrandedCounts reduceInit() {
|
||||
return new StrandedCounts(maxReadLength);
|
||||
}
|
||||
|
||||
public StrandedCounts map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
StrandedCounts counts = new StrandedCounts(maxReadLength);
|
||||
updateCounts(counts,context, ref);
|
||||
return counts;
|
||||
}
|
||||
|
||||
public StrandedCounts reduce( StrandedCounts map, StrandedCounts red ) {
|
||||
map.update(red);
|
||||
return map;
|
||||
}
|
||||
|
||||
public void updateCounts( StrandedCounts counts, AlignmentContext context, ReferenceContext ref ) {
|
||||
ReadBackedPileup p = context.getPileup();
|
||||
for ( PileupElement e : p ) {
|
||||
updateLocus(counts,e,ref);
|
||||
updateReads(counts,e,ref);
|
||||
}
|
||||
}
|
||||
|
||||
public void updateLocus( StrandedCounts counts, PileupElement e, ReferenceContext ref ) {
|
||||
if ( ! useCycle ) {
|
||||
counts.updateLocus( (int) e.getQual(), ! e.getRead().getReadNegativeStrandFlag() );
|
||||
} else {
|
||||
counts.updateLocus( (int) e.getQual(), ! e.getRead().getFirstOfPairFlag() );
|
||||
}
|
||||
}
|
||||
|
||||
public void updateReads( StrandedCounts counts, PileupElement e, ReferenceContext ref ) {
|
||||
SAMRecord read = e.getRead();
|
||||
String readString = read.getReadName() + read.getReadNegativeStrandFlag();
|
||||
String mateString = read.getReadName() + !read.getReadNegativeStrandFlag();
|
||||
if ( pairCache.containsKey(readString) ) { // read is already in there
|
||||
// do nothing
|
||||
} else if ( pairCache.containsKey( mateString ) ) { // has the mate
|
||||
byte[] mate = (byte[]) pairCache.remove(mateString);
|
||||
updatePairCounts(counts,ref,e,read,mate);
|
||||
} else { // has neither read nor mate
|
||||
pairCache.put(readString, read.getBaseQualities() ); // only store qualities, should help gc going haywire
|
||||
}
|
||||
}
|
||||
|
||||
public void updatePairCounts( StrandedCounts counts, ReferenceContext ref, PileupElement e, SAMRecord read, byte[] mateQuals ) {
|
||||
byte[] readQuals = read.getBaseQualities();
|
||||
if ( ! useCycle ) {
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
updateReadQualities(mateQuals,readQuals,counts);
|
||||
} else {
|
||||
updateReadQualities(readQuals,mateQuals,counts);
|
||||
}
|
||||
} else {
|
||||
if ( read.getFirstOfPairFlag() ) {
|
||||
updateReadQualities(readQuals,mateQuals,counts);
|
||||
} else {
|
||||
updateReadQualities(mateQuals,readQuals,counts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void updateReadQualities(byte[] forQuals, byte[] revQuals, StrandedCounts counts) {
|
||||
for ( int i = 0; i < forQuals.length; i ++ ) {
|
||||
counts.updateReadPair((int) forQuals[i], (int) revQuals[forQuals.length-1-i],i,forQuals.length-1-i);
|
||||
}
|
||||
}
|
||||
|
||||
public void onTraversalDone(StrandedCounts finalCounts) {
|
||||
try {
|
||||
PrintWriter locusOut = new PrintWriter(locusOutput);
|
||||
System.out.println("#$"); //delimeter
|
||||
System.out.print(finalCounts.locusCountsAsString());
|
||||
System.out.println("#$");
|
||||
System.out.println("Unmatched reads="+pairCache.size());
|
||||
System.out.println("#$");
|
||||
locusOut.printf(finalCounts.locusCountsAsString());
|
||||
PrintWriter pairOut = new PrintWriter(pairOutput);
|
||||
pairOut.printf(finalCounts.pairCountsAsString());
|
||||
System.out.println("#$");
|
||||
System.out.print(finalCounts.pairCountsAsString());
|
||||
System.out.print("#$");
|
||||
} catch ( IOException e ) {
|
||||
throw new StingException("Outputfile could not be opened");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* this class holds four arrays of longs for quality score counts
|
||||
*/
|
||||
class StrandedCounts {
|
||||
public int readLength;
|
||||
public long[][] forwardCountsByOffset;
|
||||
public long[][] reverseCountsByOffset;
|
||||
public long[] forwardCountsLocusAggregate;
|
||||
public long[] reverseCountsLocusAggregate;
|
||||
|
||||
public StrandedCounts(int maxReadLength) {
|
||||
readLength = maxReadLength;
|
||||
forwardCountsByOffset = new long[maxReadLength][QualityUtils.MAX_REASONABLE_Q_SCORE+3];
|
||||
reverseCountsByOffset = new long[maxReadLength][QualityUtils.MAX_REASONABLE_Q_SCORE+3];
|
||||
forwardCountsLocusAggregate = new long[QualityUtils.MAX_REASONABLE_Q_SCORE+3];
|
||||
reverseCountsLocusAggregate = new long[QualityUtils.MAX_REASONABLE_Q_SCORE+3];
|
||||
for ( int q = 0; q < QualityUtils.MAX_REASONABLE_Q_SCORE+3; q ++ ) {
|
||||
for ( int l = 0; l < maxReadLength; l ++ ) {
|
||||
forwardCountsByOffset[l][q] = 0l;
|
||||
reverseCountsByOffset[l][q] = 0l;
|
||||
}
|
||||
forwardCountsLocusAggregate[q] = 0l;
|
||||
reverseCountsLocusAggregate[q] = 0l;
|
||||
}
|
||||
}
|
||||
|
||||
public void updateLocus( int quality, boolean forward) {
|
||||
if ( forward ) {
|
||||
forwardCountsLocusAggregate[quality < 0 ? 0 : quality > 40 ? 40 : quality]++;
|
||||
} else {
|
||||
reverseCountsLocusAggregate[quality < 0 ? 0 : quality > 40 ? 40 : quality]++;
|
||||
}
|
||||
}
|
||||
|
||||
public void updateReadPair( int fQual, int rQual, int fOff, int rOff ) { // hehe f Off
|
||||
if ( rOff < 0 || fOff < 0 )
|
||||
throw new StingException("Offset is negative. Should never happen.");
|
||||
forwardCountsByOffset[fOff][fQual < 0 ? 0 : fQual > 40 ? 40 : fQual]++;
|
||||
reverseCountsByOffset[rOff][rQual < 0 ? 0 : rQual > 40 ? 40 : rQual]++;
|
||||
}
|
||||
|
||||
public void update( StrandedCounts otherCounts ) {
|
||||
|
||||
for ( int q = 0; q < QualityUtils.MAX_REASONABLE_Q_SCORE+3; q ++ ) {
|
||||
for ( int l = 0; l < readLength; l ++ ) {
|
||||
forwardCountsByOffset[l][q] += otherCounts.forwardCountsByOffset[l][q];
|
||||
reverseCountsByOffset[l][q] += otherCounts.reverseCountsByOffset[l][q];
|
||||
}
|
||||
|
||||
forwardCountsLocusAggregate[q] += otherCounts.forwardCountsLocusAggregate[q];
|
||||
reverseCountsLocusAggregate[q] += otherCounts.reverseCountsLocusAggregate[q];
|
||||
}
|
||||
}
|
||||
|
||||
public String pairCountsAsString() {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
StringBuffer check = new StringBuffer();
|
||||
String test = "";
|
||||
for ( int i = 0; i < readLength; i ++ ) {
|
||||
//System.out.println("APPENDING LINE: "+i);
|
||||
buf.append(i);
|
||||
check.append(i);
|
||||
test = test+i;
|
||||
for ( int j = 0; j < QualityUtils.MAX_REASONABLE_Q_SCORE+3; j ++ ) {
|
||||
buf.append("\t");
|
||||
buf.append(forwardCountsByOffset[i][j]);
|
||||
test = test+"\t"+forwardCountsByOffset[i][j];
|
||||
buf.append(";");
|
||||
buf.append(reverseCountsByOffset[i][j]);
|
||||
test = test+"\t"+reverseCountsByOffset[i][j];
|
||||
}
|
||||
test = test+"\n";
|
||||
buf.append("\n");
|
||||
check.append("\n");
|
||||
}
|
||||
//System.out.print(check.toString());
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
public String locusCountsAsString() {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
for ( int i = 0; i < forwardCountsLocusAggregate.length; i ++ ) {
|
||||
buf.append(i);
|
||||
buf.append("\t");
|
||||
buf.append(forwardCountsLocusAggregate[i]);
|
||||
buf.append("\t");
|
||||
buf.append(reverseCountsLocusAggregate[i]);
|
||||
buf.append(String.format("%s%n",""));
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -209,6 +209,15 @@ public class QualityUtils {
|
|||
return rcCompressedQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the reverse of a byte array of qualities (compressed or otherwise)
|
||||
* @param quals the array of bytes to be reversed
|
||||
* @return the reverse of the quality array
|
||||
*/
|
||||
static public byte[] reverseQualityArray( byte[] quals ) {
|
||||
return BaseUtils.reverse(quals); // no sense in duplicating functionality
|
||||
}
|
||||
|
||||
// TODO --
|
||||
// TODO --
|
||||
// TODO -- stolen from SAMUtils in picard -- remove when public access is granted
|
||||
|
|
|
|||
Loading…
Reference in New Issue