From a32245f7d24c71bb5219791eea438b7ed3266787 Mon Sep 17 00:00:00 2001 From: chartl Date: Wed, 6 Jan 2010 19:18:07 +0000 Subject: [PATCH] Modifications: QualityUtils - Stole the BaseUtils code for flipping reads around and applied it to quality scores SecondBaseSkew - Nothing's really different, just a commented line Additions (experimental annotations for future development of second-base annotation) ** I DO NOT INTEND FOR ANYONE TO USE THESE ** - ProportionOfNonrefBasesSupportingSNP - ProportionOfSNPSecondBasesSupportingRef - ProportionOfRefSecondBasesSupportingSNP + I hope these are self-explanatory - QualityAdjustedSecondBaseLod + Adjust lod-score by 10*log10[P[second bases are as observed]] Added walker: QualityScoreByStrand - oneoff project that's being saved if i ever need it git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2527 348d0f76-0448-11de-a6fe-93d51630548a --- .../ProportionOfNonrefBasesSupportingSNP.java | 64 +++++ ...oportionOfRefSecondBasesSupportingSNP.java | 77 ++++++ ...oportionOfSNPSecondBasesSupportingRef.java | 80 ++++++ .../QualityAdjustedSecondBaseLod.java | 37 +++ .../walkers/annotator/SecondBaseSkew.java | 13 +- .../walkers/QualityScoreByStrandWalker.java | 230 ++++++++++++++++++ .../sting/utils/QualityUtils.java | 9 + 7 files changed, 502 insertions(+), 8 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java create mode 100644 java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java new file mode 100644 index 000000000..2c0925923 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; + +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Dec 17, 2009 + * Time: 2:48:15 PM + * To change this template use File | Settings | File Templates. + */ +public class ProportionOfNonrefBasesSupportingSNP implements VariantAnnotation{ + private String KEY_NAME = "prop_nonref_that_are_snp"; + + public String getKeyName() { return KEY_NAME; } + + public VCFInfoHeaderLine getDescription() { + return new VCFInfoHeaderLine(KEY_NAME, + 1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of non-reference bases that are the SNP base"); + } + + public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map context, Variation var) { + if ( ! var.isSNP() || ! var.isBiallelic() ) { + return null; + } else { + Pair totalNonref_totalSNP = new Pair(0,0); + for ( String sample : context.keySet() ) { + ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); + totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalNonref_totalSNP); + + } + double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP); + return String.format("%f", p ); + } + } + + private Pair getNonrefAndSNP(ReadBackedPileup p, char ref, char snp, Pair totals) { + int[] counts = p.getBaseCounts(); + int nonrefCounts = 0; + int snpCounts = counts[BaseUtils.simpleBaseToBaseIndex(snp)]; + for ( char c : BaseUtils.BASES ) { + if ( ! BaseUtils.basesAreEqual((byte) c, (byte) ref) ) { + nonrefCounts += counts[BaseUtils.simpleBaseToBaseIndex(c)]; + } + } + + totals.first+=nonrefCounts; + totals.second+=snpCounts; + return totals; + } + + private double getProportionOfNonrefBasesThatAreSNP( Pair totalNonref_totalSNP ) { + return ( 1.0 + totalNonref_totalSNP.second ) / (1.0 + totalNonref_totalSNP.first ); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java new file mode 100644 index 000000000..2da68d331 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java @@ -0,0 +1,77 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.FileOutputStream; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Dec 17, 2009 + * Time: 2:18:43 PM + * To change this template use File | Settings | File Templates. + */ +public class ProportionOfRefSecondBasesSupportingSNP implements VariantAnnotation{ + private String KEY_NAME = "ref_2bb_snp_prop"; + private boolean USE_MAPQ0_READS = false; + + public String getKeyName() { return KEY_NAME; } + + public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map context, Variation var) { + if ( ! var.isSNP() || ! var.isBiallelic() ) { + return null; + } else { + Pair totalAndSNPSupporting = new Pair(0,0); + for ( String sample : context.keySet() ) { + ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); + totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting); + + } + double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting); + return String.format("%f", p ); + } + } + + private double getProportionOfRefSecondaryBasesSupportingSNP(Pair tRef_snpSupport) { + return ( 1.0 + tRef_snpSupport.second) / (1.0 + tRef_snpSupport.first ); + } + + private Pair getTotalRefAndSNPSupportCounts(ReadBackedPileup p, char ref, char snp, Pair refAndSNPCounts) { + int nRefBases = 0; + int nSecondBasesSupportingSNP = 0; + for (PileupElement e : p ) { + if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref ) ) { + if ( BaseUtils.isRegularBase(e.getSecondBase()) ) { + nRefBases++; + if ( BaseUtils.basesAreEqual( e.getSecondBase(), (byte) snp ) ) { + nSecondBasesSupportingSNP++; + } + } + } + } + + refAndSNPCounts.first+=nRefBases; + refAndSNPCounts.second+=nSecondBasesSupportingSNP; + return refAndSNPCounts; + } + + public VCFInfoHeaderLine getDescription() { + return new VCFInfoHeaderLine(KEY_NAME, + 1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of second best base calls for reference base that support the SNP base"); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java new file mode 100644 index 000000000..5c7894949 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java @@ -0,0 +1,80 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; + +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Dec 17, 2009 + * Time: 2:42:05 PM + * To change this template use File | Settings | File Templates. + */ +public class ProportionOfSNPSecondBasesSupportingRef implements VariantAnnotation{ + public String KEY_NAME = "SNP_2B_SUPPORT_REF"; + public boolean USE_MAPQ0_READS = false; + public String debug_file = "/humgen/gsa-scr1/chartl/temporary/ProportionOfRefSecondBasesSupportingSNP.debug.txt"; + + public String getKeyName() { return KEY_NAME; } + + public boolean useZeroQualityReads() { return USE_MAPQ0_READS; } + + public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map context, Variation var) { + if ( ! var.isSNP() || ! var.isBiallelic() ) { + return null; + } else { + Pair totalAndSNPSupporting = new Pair(0,0); + for ( String sample : context.keySet() ) { + ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); + totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting); + + } + double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting); + return String.format("%f", p ); + } + } + + public double getProportionOfSNPSecondaryBasesSupportingRef(Pair tSNP_refSupport) { + return ( 1.0 + tSNP_refSupport.second) / (1.0 + tSNP_refSupport.first ); + } + + public Pair getTotalSNPandRefSupporting(ReadBackedPileup p, char ref, char snp, Pair SNPandRefCounts) { + int nSNPBases = 0; + int nSNPBasesSupportingRef = 0; + for (PileupElement e : p ) { + if ( BaseUtils.basesAreEqual( e.getBase(), (byte) snp ) ) { + if ( hasSecondBase(e) ) { + nSNPBases++; + if ( BaseUtils.basesAreEqual( e.getSecondBase(), (byte) ref ) ) { + nSNPBasesSupportingRef++; + } + } + } + } + + SNPandRefCounts.first+=nSNPBases; + SNPandRefCounts.second+=nSNPBasesSupportingRef; + return SNPandRefCounts; + } + + + public boolean hasSecondBase(PileupElement e) { + return BaseUtils.isRegularBase(e.getSecondBase()); + } + + public VCFInfoHeaderLine getDescription() { + return new VCFInfoHeaderLine(KEY_NAME, + 1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of second best base calls for SNP base that support the Ref base"); + } + + +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java new file mode 100644 index 000000000..dd3b43440 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java @@ -0,0 +1,37 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; + +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: Dec 19, 2009 + * Time: 1:02:09 AM + * To change this template use File | Settings | File Templates. + */ +public class QualityAdjustedSecondBaseLod implements VariantAnnotation { + private final String KEY_NAME = "Qual_Adjusted_2blod"; + private final double CHI_LOD_MAX = -1000.0; + private final SecondBaseSkew skewCalc = new SecondBaseSkew(); + private final double log10e = Math.log10(Math.E); + private final double log10half = Math.log10(1.0/2); + + public String getKeyName() { return KEY_NAME; } + + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(KEY_NAME, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Adjusted residual quality based on second-base skew"); } + + public String annotate( RefMetaDataTracker tracker, ReferenceContext ref, Map contexts, Variation variant) { + String chi = skewCalc.annotate(tracker, ref,contexts,variant); + if ( chi == null ) + return null; + double chi_square = Double.valueOf(chi); + double chi_loglik = chi_square <= 0.0 ? 0.0 : Math.max(-(chi_square/2.0)*log10e + log10half,CHI_LOD_MAX); // cap it... + return String.format("%f", 10*(variant.getNegLog10PError() + chi_loglik)); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index 69a842a6f..501fe3f2b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -21,9 +21,9 @@ import java.util.Map; * To change this template use File | Settings | File Templates. */ public class SecondBaseSkew implements VariantAnnotation { - private final static double epsilon = Math.pow(10.0,-12); + private final static double epsilon = Math.pow(10.0,-12.0); private final static String KEY_NAME = "2b_Chi"; - private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3}; + private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3.0, 2.0/3.0}; private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO; public String getKeyName() { return KEY_NAME; } @@ -34,11 +34,12 @@ public class SecondBaseSkew implements VariantAnnotation { if ( !variation.isBiallelic() || !variation.isSNP() ) return null; - char snp = variation.getAlternativeBaseForSNP(); + char alternate = variation.getAlternativeBaseForSNP(); Pair depth = new Pair(0, 0); for ( String sample : stratifiedContexts.keySet() ) { - Pair sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), snp); + //Pair sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(),stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(), alternate); + Pair sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), alternate); depth.first += sampleDepth.first; depth.second += sampleDepth.second; } @@ -47,12 +48,8 @@ public class SecondBaseSkew implements VariantAnnotation { return null; double biasedProportion = (1.0 + depth.second) / (1.0 + depth.first); - - //System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond()); double p_transformed = transform(biasedProportion, depth.first+1); double expected_transformed = transform(proportionExpectations[0], depth.first+1); - // System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst()); - // System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]); double chi_square = Math.signum(biasedProportion - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE); return String.format("%f", chi_square); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java new file mode 100644 index 000000000..fc45dd4c8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java @@ -0,0 +1,230 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import java.io.IOException; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.playground.utils.PoolUtils; +import org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad; +import sun.misc.Cache; +import net.sf.samtools.SAMRecord; + +import java.util.HashMap; +import java.io.PrintStream; +import java.io.PrintWriter; + +/** + * Created by IntelliJ IDEA. + * User: Ghost + * Date: Dec 15, 2009 + * Time: 11:56:22 AM + * To change this template use File | Settings | File Templates. + */ +/* + * This walker prints out quality score counts for forward and reverse stranded reads aggregated over all loci + * in the interval. Furthermore, it prints out quality score counts at a particular offset of forward and reverse + * reads, aggregated across all paired-end reads in the interval. + * + * @Author: Chris Hartl + */ +public class QualityScoreByStrandWalker extends LocusWalker { + @Argument(fullName="readLength", shortName="rl", doc="Maximum length of the reads in the bam file", required=true) + int maxReadLength = -1; + @Argument(fullName="locusCountsOutput", shortName="lcf", doc="File to print locus count information to", required=true) + String locusOutput = null; + @Argument(fullName="pairCountsOutput", shortName="pcf", doc="File to print pair count information to", required=true) + String pairOutput = null; + @Argument(fullName="useCycle", shortName="c", doc="Use cycle directly rather than strand", required=false) + boolean useCycle = false; + + public HashMap pairCache = new HashMap(); + + public StrandedCounts reduceInit() { + return new StrandedCounts(maxReadLength); + } + + public StrandedCounts map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + StrandedCounts counts = new StrandedCounts(maxReadLength); + updateCounts(counts,context, ref); + return counts; + } + + public StrandedCounts reduce( StrandedCounts map, StrandedCounts red ) { + map.update(red); + return map; + } + + public void updateCounts( StrandedCounts counts, AlignmentContext context, ReferenceContext ref ) { + ReadBackedPileup p = context.getPileup(); + for ( PileupElement e : p ) { + updateLocus(counts,e,ref); + updateReads(counts,e,ref); + } + } + + public void updateLocus( StrandedCounts counts, PileupElement e, ReferenceContext ref ) { + if ( ! useCycle ) { + counts.updateLocus( (int) e.getQual(), ! e.getRead().getReadNegativeStrandFlag() ); + } else { + counts.updateLocus( (int) e.getQual(), ! e.getRead().getFirstOfPairFlag() ); + } + } + + public void updateReads( StrandedCounts counts, PileupElement e, ReferenceContext ref ) { + SAMRecord read = e.getRead(); + String readString = read.getReadName() + read.getReadNegativeStrandFlag(); + String mateString = read.getReadName() + !read.getReadNegativeStrandFlag(); + if ( pairCache.containsKey(readString) ) { // read is already in there + // do nothing + } else if ( pairCache.containsKey( mateString ) ) { // has the mate + byte[] mate = (byte[]) pairCache.remove(mateString); + updatePairCounts(counts,ref,e,read,mate); + } else { // has neither read nor mate + pairCache.put(readString, read.getBaseQualities() ); // only store qualities, should help gc going haywire + } + } + + public void updatePairCounts( StrandedCounts counts, ReferenceContext ref, PileupElement e, SAMRecord read, byte[] mateQuals ) { + byte[] readQuals = read.getBaseQualities(); + if ( ! useCycle ) { + if ( read.getReadNegativeStrandFlag() ) { + updateReadQualities(mateQuals,readQuals,counts); + } else { + updateReadQualities(readQuals,mateQuals,counts); + } + } else { + if ( read.getFirstOfPairFlag() ) { + updateReadQualities(readQuals,mateQuals,counts); + } else { + updateReadQualities(mateQuals,readQuals,counts); + } + } + } + + public void updateReadQualities(byte[] forQuals, byte[] revQuals, StrandedCounts counts) { + for ( int i = 0; i < forQuals.length; i ++ ) { + counts.updateReadPair((int) forQuals[i], (int) revQuals[forQuals.length-1-i],i,forQuals.length-1-i); + } + } + + public void onTraversalDone(StrandedCounts finalCounts) { + try { + PrintWriter locusOut = new PrintWriter(locusOutput); + System.out.println("#$"); //delimeter + System.out.print(finalCounts.locusCountsAsString()); + System.out.println("#$"); + System.out.println("Unmatched reads="+pairCache.size()); + System.out.println("#$"); + locusOut.printf(finalCounts.locusCountsAsString()); + PrintWriter pairOut = new PrintWriter(pairOutput); + pairOut.printf(finalCounts.pairCountsAsString()); + System.out.println("#$"); + System.out.print(finalCounts.pairCountsAsString()); + System.out.print("#$"); + } catch ( IOException e ) { + throw new StingException("Outputfile could not be opened"); + } + } +} + +/* + * this class holds four arrays of longs for quality score counts + */ +class StrandedCounts { + public int readLength; + public long[][] forwardCountsByOffset; + public long[][] reverseCountsByOffset; + public long[] forwardCountsLocusAggregate; + public long[] reverseCountsLocusAggregate; + + public StrandedCounts(int maxReadLength) { + readLength = maxReadLength; + forwardCountsByOffset = new long[maxReadLength][QualityUtils.MAX_REASONABLE_Q_SCORE+3]; + reverseCountsByOffset = new long[maxReadLength][QualityUtils.MAX_REASONABLE_Q_SCORE+3]; + forwardCountsLocusAggregate = new long[QualityUtils.MAX_REASONABLE_Q_SCORE+3]; + reverseCountsLocusAggregate = new long[QualityUtils.MAX_REASONABLE_Q_SCORE+3]; + for ( int q = 0; q < QualityUtils.MAX_REASONABLE_Q_SCORE+3; q ++ ) { + for ( int l = 0; l < maxReadLength; l ++ ) { + forwardCountsByOffset[l][q] = 0l; + reverseCountsByOffset[l][q] = 0l; + } + forwardCountsLocusAggregate[q] = 0l; + reverseCountsLocusAggregate[q] = 0l; + } + } + + public void updateLocus( int quality, boolean forward) { + if ( forward ) { + forwardCountsLocusAggregate[quality < 0 ? 0 : quality > 40 ? 40 : quality]++; + } else { + reverseCountsLocusAggregate[quality < 0 ? 0 : quality > 40 ? 40 : quality]++; + } + } + + public void updateReadPair( int fQual, int rQual, int fOff, int rOff ) { // hehe f Off + if ( rOff < 0 || fOff < 0 ) + throw new StingException("Offset is negative. Should never happen."); + forwardCountsByOffset[fOff][fQual < 0 ? 0 : fQual > 40 ? 40 : fQual]++; + reverseCountsByOffset[rOff][rQual < 0 ? 0 : rQual > 40 ? 40 : rQual]++; + } + + public void update( StrandedCounts otherCounts ) { + + for ( int q = 0; q < QualityUtils.MAX_REASONABLE_Q_SCORE+3; q ++ ) { + for ( int l = 0; l < readLength; l ++ ) { + forwardCountsByOffset[l][q] += otherCounts.forwardCountsByOffset[l][q]; + reverseCountsByOffset[l][q] += otherCounts.reverseCountsByOffset[l][q]; + } + + forwardCountsLocusAggregate[q] += otherCounts.forwardCountsLocusAggregate[q]; + reverseCountsLocusAggregate[q] += otherCounts.reverseCountsLocusAggregate[q]; + } + } + + public String pairCountsAsString() { + StringBuffer buf = new StringBuffer(); + StringBuffer check = new StringBuffer(); + String test = ""; + for ( int i = 0; i < readLength; i ++ ) { + //System.out.println("APPENDING LINE: "+i); + buf.append(i); + check.append(i); + test = test+i; + for ( int j = 0; j < QualityUtils.MAX_REASONABLE_Q_SCORE+3; j ++ ) { + buf.append("\t"); + buf.append(forwardCountsByOffset[i][j]); + test = test+"\t"+forwardCountsByOffset[i][j]; + buf.append(";"); + buf.append(reverseCountsByOffset[i][j]); + test = test+"\t"+reverseCountsByOffset[i][j]; + } + test = test+"\n"; + buf.append("\n"); + check.append("\n"); + } + //System.out.print(check.toString()); + return buf.toString(); + } + + public String locusCountsAsString() { + StringBuffer buf = new StringBuffer(); + for ( int i = 0; i < forwardCountsLocusAggregate.length; i ++ ) { + buf.append(i); + buf.append("\t"); + buf.append(forwardCountsLocusAggregate[i]); + buf.append("\t"); + buf.append(reverseCountsLocusAggregate[i]); + buf.append(String.format("%s%n","")); + } + + return buf.toString(); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 76fa1b104..6dd6ebdbb 100755 --- a/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -209,6 +209,15 @@ public class QualityUtils { return rcCompressedQuals; } + /** + * Return the reverse of a byte array of qualities (compressed or otherwise) + * @param quals the array of bytes to be reversed + * @return the reverse of the quality array + */ + static public byte[] reverseQualityArray( byte[] quals ) { + return BaseUtils.reverse(quals); // no sense in duplicating functionality + } + // TODO -- // TODO -- // TODO -- stolen from SAMUtils in picard -- remove when public access is granted