From 6fe1c337ff9d05bf796abb8924fa2dbb50e4f93d Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 18 Nov 2009 17:03:48 +0000 Subject: [PATCH] Pileup cleanup; pooled caller v1 git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2070 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/SAMPileupRecord.java | 10 ++- .../sting/gatk/walkers/PileupWalker.java | 4 +- .../gatk/walkers/ValidatingPileupWalker.java | 2 +- .../gatk/walkers/annotator/AlleleBalance.java | 41 +++++----- .../gatk/walkers/annotator/OnOffGenotype.java | 38 ++++----- .../walkers/annotator/SecondBaseSkew.java | 2 +- .../walkers/filters/IVFBinomialStrand.java | 2 +- .../gatk/walkers/filters/IVFPrimaryBases.java | 2 +- .../walkers/filters/IVFSecondaryBases.java | 2 +- .../walkers/filters/VECAlleleBalance.java | 2 +- .../filters/VECOnOffGenotypeRatio.java | 2 +- .../genotyper/GenotypeCalculationModel.java | 5 ++ ...JointEstimateGenotypeCalculationModel.java | 35 +++++---- .../genotyper/PooledCalculationModel.java | 49 ++++++++++-- .../genotyper/UnifiedArgumentCollection.java | 2 + ...seTransitionTableCalculatorJavaWalker.java | 10 +-- .../FindContaminatingReadGroupsWalker.java | 2 +- .../sting/utils/BasicPileup.java | 77 ++++++++++++++----- .../broadinstitute/sting/utils/Pileup.java | 14 +++- .../sting/utils/ReadBackedPileup.java | 58 ++++++++++++-- .../sting/utils/duplicates/DupUtils.java | 59 +------------- .../SecondBaseSkewIntegrationTest.java | 2 +- 22 files changed, 259 insertions(+), 161 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java b/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java index 64434ccfb..6fccc683e 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java @@ -379,7 +379,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { public GenomeLoc getLocation() { return loc; } - public String getQuals() { return pileupQuals; } + public String getQualsAsString() { return pileupQuals; } /** Returns reference base for point genotypes or '*' for indel genotypes, as a char. * @@ -390,16 +390,20 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup { /** Returns pile of observed bases over the current genomic location. * */ - public String getBases() { return pileupBases; } + public String getBasesAsString() { return pileupBases; } /** Returns formatted pileup string for the current genomic location as * "location: reference_base observed_base_pile observed_qual_pile" */ public String getPileupString() { - return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals()); + return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString()); } + public ArrayList getBasesAsArrayList() { throw new StingException("Not implemented"); } + public ArrayList getQualsAsArrayList() { throw new StingException("Not implemented"); } + public byte[] getBases() { throw new StingException("Not implemented"); } + public byte[] getQuals() { throw new StingException("Not implemented"); } /** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single * character (reference base). For indel genotypes, the string is empty for insertions into diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 587b61c9a..85cbffc85 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -70,7 +70,7 @@ public class PileupWalker extends LocusWalker implements TreeR public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); - String bases = pileup.getBases(); + String bases = pileup.getBasesAsString(); if ( bases.equals("") && !IGNORE_UNCOVERED_BASES ) { bases = "***UNCOVERED_SITE***"; @@ -119,7 +119,7 @@ public class PileupWalker extends LocusWalker implements TreeR if( secondBasePileup != null ) return " " + secondBasePileup; else - return " " + Utils.dupString('N', pileup.getBases().length()); + return " " + Utils.dupString('N', pileup.getBasesAsString().length()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java index 016c329d8..841df7b62 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java @@ -31,7 +31,7 @@ public class ValidatingPileupWalker extends LocusWalker annotate(ReferenceContext ref, ReadBackedPileup pileup, List genotypes) { + double ratio = -1; - double ratio; + if ( genotypes.size() > 0 ) { + Genotype g = genotypes.get(0); + if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) { + Pair weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); + if ( weightedBalance.second == 0 ) + return null; + ratio = weightedBalance.first; + } else { + // this test doesn't make sense for homs + Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes); + if ( genotype == null ) + return null; - Genotype g = genotypes.get(0); - if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) { - Pair weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); - if ( weightedBalance.second == 0 ) - return null; - ratio = weightedBalance.first; - } else { - // this test doesn't make sense for homs - Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes); - if ( genotype == null ) - return null; + final String genotypeStr = genotype.getBases().toUpperCase(); + if ( genotypeStr.length() != 2 ) + return null; - final String genotypeStr = genotype.getBases().toUpperCase(); - if ( genotypeStr.length() != 2 ) - return null; + final String bases = pileup.getBasesAsString().toUpperCase(); + if ( bases.length() == 0 ) + return null; - final String bases = pileup.getBases().toUpperCase(); - if ( bases.length() == 0 ) - return null; - - ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases); + ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases); + } } return new Pair("AlleleBalance", String.format("%.2f", ratio)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java index 142c50207..6e97470fa 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/OnOffGenotype.java @@ -14,28 +14,30 @@ public class OnOffGenotype implements VariantAnnotation { public Pair annotate(ReferenceContext ref, ReadBackedPileup pileup, List genotypes) { - double ratio; + double ratio = -1; - Genotype g = genotypes.get(0); - if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) { - Pair weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); - if ( weightedBalance.second == 0 ) - return null; - ratio = weightedBalance.first; - } else { - Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes); - if ( genotype == null ) - return null; + if ( genotypes.size() > 0 ) { + Genotype g = genotypes.get(0); + if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) { + Pair weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); + if ( weightedBalance.second == 0 ) + return null; + ratio = weightedBalance.first; + } else { + Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes); + if ( genotype == null ) + return null; - final String genotypeStr = genotype.getBases().toUpperCase(); - if ( genotypeStr.length() != 2 ) - return null; + final String genotypeStr = genotype.getBases().toUpperCase(); + if ( genotypeStr.length() != 2 ) + return null; - final String bases = pileup.getBases().toUpperCase(); - if ( bases.length() == 0 ) - return null; + final String bases = pileup.getBasesAsString().toUpperCase(); + if ( bases.length() == 0 ) + return null; - ratio = computeSingleBalance(genotypeStr, bases); + ratio = computeSingleBalance(genotypeStr, bases); + } } return new Pair("OnOffGenotype", String.format("%.2f", ratio)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index 660d553c2..02d3ce84b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -58,7 +58,7 @@ public class SecondBaseSkew implements VariantAnnotation{ } int variantDepth = 0; int variantsWithRefSecondBase = 0; - char[] primaryPileup = p.getBases().toCharArray(); + byte[] primaryPileup = p.getBases(); String secondBasePileup = p.getSecondaryBasePileup(); if ( secondBasePileup == null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java index 51bea790b..e5b851ab6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFBinomialStrand.java @@ -24,7 +24,7 @@ public class IVFBinomialStrand implements IndependentVariantFeature { ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); List reads = context.getAlignmentContext(useZeroQualityReads()).getReads(); - String bases = pileup.getBases(); + String bases = pileup.getBasesAsString(); for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { Genotype genotype = Genotype.values()[genotypeIndex]; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java index 8dc5636ea..eed6115c1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFPrimaryBases.java @@ -37,7 +37,7 @@ public class IVFPrimaryBases implements IndependentVariantFeature { likelihoods = new double[10]; ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); - String primaryBases = pileup.getBases(); + String primaryBases = pileup.getBasesAsString(); for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java index f4d0f54ae..09a9b49cc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/IVFSecondaryBases.java @@ -64,7 +64,7 @@ public class IVFSecondaryBases implements IndependentVariantFeature { likelihoods = new double[10]; ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); - String primaryBases = pileup.getBases(); + String primaryBases = pileup.getBasesAsString(); String secondaryBases = pileup.getSecondaryBasePileup(); for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java index 24a1b93f3..af4cc7c61 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECAlleleBalance.java @@ -44,7 +44,7 @@ public class VECAlleleBalance extends RatioFilter { if ( genotype.length() > 2 ) throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); - final String bases = pileup.getBases(); + final String bases = pileup.getBasesAsString(); if ( bases.length() == 0 ) { ratio = 0.0; return new Pair(0, 0); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java index 43a3e89e1..c3834c09a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VECOnOffGenotypeRatio.java @@ -39,7 +39,7 @@ public class VECOnOffGenotypeRatio extends RatioFilter { if ( genotype.length() > 2 ) throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); - final String bases = pileup.getBases(); + final String bases = pileup.getBasesAsString(); if ( bases.length() == 0 ) { ratio = 0.0; return new Pair(0, 0); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 27c9d938a..528ce976a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -39,6 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected double CONFIDENCE_THRESHOLD; protected double MINIMUM_ALLELE_FREQUENCY; protected double ALLELE_FREQUENCY_RANGE; + protected boolean REPORT_SLOD; protected int maxDeletionsInPileup; protected String assumedSingleSample; protected PrintWriter verboseWriter; @@ -78,12 +79,16 @@ public abstract class GenotypeCalculationModel implements Cloneable { if ( UAC.VERBOSE != null ) { try { verboseWriter = new PrintWriter(UAC.VERBOSE); + initializeVerboseWriter(verboseWriter); } catch (FileNotFoundException e) { throw new StingException("Could not open file " + UAC.VERBOSE + " for writing"); } } + REPORT_SLOD = ! UAC.NO_SLOD; } + protected void initializeVerboseWriter(PrintWriter writer) { }; + public void close() { if ( verboseWriter != null ) verboseWriter.close(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index cc255fe03..dd1d6039c 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; +import java.io.PrintStream; +import java.io.PrintWriter; public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { @@ -61,6 +63,17 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints); } + protected void initializeVerboseWriter(PrintWriter verboseWriter) { + // by default, no initialization is done + StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t"); + for ( char altAllele : BaseUtils.BASES ) { + char base = Character.toLowerCase(altAllele); + header.append("POfDGivenAFFor" + base + "\t"); + header.append("PosteriorAFFor" + base + "\t"); + } + verboseWriter.println(header); + } + protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { // by default, no initialization is done return; @@ -175,24 +188,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc } protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) { - - verboseWriter.println("Location=" + loc + ", ref=" + ref); - StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t"); - for ( char altAllele : BaseUtils.BASES ) { - if ( altAllele != ref ) { - char base = Character.toLowerCase(altAllele); - header.append("P(D|AF)_" + base + "\t"); - header.append("PosteriorAF_" + base + "\t"); - } - } - verboseWriter.println(header); - for (int i = 0; i < frequencyEstimationPoints; i++) { - StringBuilder AFline = new StringBuilder(i + "/" + (frequencyEstimationPoints-1) + "\t" + String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t"); + StringBuilder AFline = new StringBuilder("AFINFO\t"); + AFline.append(loc).append("\t"); + AFline.append(i + "/" + (frequencyEstimationPoints-1) + "\t"); + AFline.append(String.format("%.2f\t", ((float)i)/ (frequencyEstimationPoints-1))); + AFline.append(String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t"); for ( char altAllele : BaseUtils.BASES ) { if ( altAllele != ref ) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i])); + } else { + AFline.append(String.format("%.8f\t%.8f\t", -1.0, -1.0)); } } verboseWriter.println(AFline); @@ -258,7 +265,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc if ( dbsnp != null ) ((IDBacked)locusdata).setID(dbsnp.getRS_ID()); } - if ( locusdata instanceof SLODBacked ) { + if ( locusdata instanceof SLODBacked && REPORT_SLOD ) { // the overall lod double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double overallLog10PofF = Math.log10(PofFs[indexOfMax]); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index d7eecfa6e..ce145d839 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import java.util.*; @@ -47,13 +50,49 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode return contexts; } - protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType) { + protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap contexts, StratifiedContext contextType) { AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); + ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType)); - System.out.printf("%s %.2f %d%n", alt, f, pileup.getBases().length()); - // TODO -- IMPLEMENT ME + double log10L = 0.0; - return -100.0; + int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg); + int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg); + + int nChromosomes = 2 * getNSamples(contexts); + int nAltAlleles = (int)(f * nChromosomes); + int nRefAlleles = nChromosomes - nAltAlleles; + + double log10POfRef = Math.log10(1 - f); + double log10POfAlt = Math.log10(f); + //double log10ChromChooseRef = Math.log10(Arithmetic.binomial(nChromosomes, nRefAlleles)); + //double log10ChromChooseAlt = Math.log10(Arithmetic.binomial(nChromosomes, nAltAlleles)); + + byte[] bases = pileup.getBases(); + byte[] quals = pileup.getQuals(); + + for ( int i = 0; i < bases.length; i++ ) { + int bIndex = BaseUtils.simpleBaseToBaseIndex((char)bases[i]); + byte qual = quals[i]; + if ( qual > 0 && bIndex != -1 ) { + double log10POfB = Math.log10(QualityUtils.qualToProb(qual)); + double log10POfNotB = Math.log10(QualityUtils.qualToErrorProb(qual)); + //System.out.printf("%f %f %f %d%n", log10L, log10POfB, log10POfNotB, qual); + + if ( bIndex == refIndex && nRefAlleles > 0 ) { + log10L += log10POfRef + log10POfB; + } else if ( bIndex == altIndex && nAltAlleles > 0) { + log10L += log10POfAlt + log10POfB; + } else { + log10L += log10POfNotB; + } + } + } + + verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n", + context.getContext(StratifiedContext.OVERALL).getLocation(), + refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L); + + return log10L; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e49dd665f..4102dd64b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -44,6 +44,8 @@ public class UnifiedArgumentCollection { @Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false) public int POOLSIZE = 0; + @Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false) + public boolean NO_SLOD = false; // control the output @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index cd57b34eb..d4657c681 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -334,13 +334,13 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker constantOffset( List reads, int i ) { @@ -43,7 +43,7 @@ abstract public class BasicPileup implements Pileup { public static String basePileupAsString( List reads, List offsets, boolean includeDeletions ) { StringBuilder bases = new StringBuilder(); - for ( byte base : basePileup(reads, offsets, includeDeletions)) { + for ( byte base : getBasesAsArrayList(reads, offsets, includeDeletions)) { bases.append((char)base); } return bases.toString(); @@ -81,11 +81,33 @@ abstract public class BasicPileup implements Pileup { return bases.toString(); } - public static ArrayList basePileup( List reads, List offsets ) { - return basePileup( reads, offsets, false ); + // + // byte[] methods + // + public static byte[] getBases( List reads, List offsets ) { + return ArrayList2byte(getBasesAsArrayList( reads, offsets )); } - public static ArrayList basePileup( List reads, List offsets, boolean includeDeletions ) { + public static byte[] getBases( List reads, List offsets, boolean includeDeletions ) { + return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions )); + } + + public static byte[] getQuals( List reads, List offsets ) { + return ArrayList2byte(getQualsAsArrayList( reads, offsets )); + } + + public static byte[] getQuals( List reads, List offsets, boolean includeDeletions ) { + return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions )); + } + + // + // ArrayList methods + // + public static ArrayList getBasesAsArrayList( List reads, List offsets ) { + return getBasesAsArrayList( reads, offsets, false ); + } + + public static ArrayList getBasesAsArrayList( List reads, List offsets, boolean includeDeletions ) { ArrayList bases = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); @@ -100,16 +122,24 @@ abstract public class BasicPileup implements Pileup { return bases; } - public static ArrayList qualPileup( List reads, List offsets ) { + public static ArrayList getQualsAsArrayList( List reads, List offsets ) { + return getQualsAsArrayList( reads, offsets, false ); + } + + public static ArrayList getQualsAsArrayList( List reads, List offsets, boolean includeDeletions ) { ArrayList quals = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); + // skip deletion sites - if ( offset == -1 ) - continue; - byte qual = (byte)read.getBaseQualities()[offset]; - quals.add(qual); + if ( offset == -1 ) { + if ( includeDeletions ) // we need the qual vector to be the same length as base vector! + quals.add((byte)0); + } else { + byte qual = read.getBaseQualities()[offset]; + quals.add(qual); + } } return quals; } @@ -128,6 +158,14 @@ abstract public class BasicPileup implements Pileup { return quals2String(mappingQualPileup(reads)); } + private static byte[] ArrayList2byte(ArrayList ab) { + byte[] ret = new byte[ab.size()]; + int i = 0; + for (byte e : ab) + ret[i++] = e; + return ret; + } + public static String quals2String( List quals ) { StringBuilder qualStr = new StringBuilder(); for ( int qual : quals ) { @@ -140,10 +178,11 @@ abstract public class BasicPileup implements Pileup { } public static String qualPileupAsString( List reads, List offsets ) { - return quals2String(qualPileup(reads, offsets)); + return quals2String(getQualsAsArrayList(reads, offsets)); } - public static ArrayList secondaryBasePileup( List reads, List offsets ) { + // todo -- there are probably bugs in here due to includeDeletion flags + public static ArrayList getSecondaryBasesAsArrayList( List reads, List offsets ) { ArrayList bases2 = new ArrayList(reads.size()); boolean hasAtLeastOneSQorE2Field = false; @@ -184,13 +223,13 @@ abstract public class BasicPileup implements Pileup { return (hasAtLeastOneSQorE2Field ? bases2 : null); } - public static String secondaryBasePileupAsString( List reads, List offsets ) { + public static String getSecondaryBasePileupAsString( List reads, List offsets ) { StringBuilder bases2 = new StringBuilder(); - ArrayList sbases = secondaryBasePileup(reads, offsets); + ArrayList sbases = getSecondaryBasesAsArrayList(reads, offsets); if (sbases == null) { return null; } - ArrayList pbases = basePileup(reads, offsets,true); + ArrayList pbases = getBasesAsArrayList(reads, offsets,true); Random generator = new Random(); @@ -391,13 +430,13 @@ abstract public class BasicPileup implements Pileup { if ( a.getLocation().compareTo(b.getLocation()) != 0 ) return "Locations not equal"; - String aBases = maybeSorted(a.getBases(), ! orderDependent ); - String bBases = maybeSorted(b.getBases(), ! orderDependent ); + String aBases = maybeSorted(a.getBasesAsString(), ! orderDependent ); + String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent ); if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) ) return "Bases not equal"; - String aQuals = maybeSorted(a.getQuals(), ! orderDependent ); - String bQuals = maybeSorted(b.getQuals(), ! orderDependent ); + String aQuals = maybeSorted(a.getQualsAsString(), ! orderDependent ); + String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent ); if ( ! aQuals.equals(bQuals) ) return "Quals not equal"; diff --git a/java/src/org/broadinstitute/sting/utils/Pileup.java b/java/src/org/broadinstitute/sting/utils/Pileup.java index 588c1ffa2..d5846d142 100755 --- a/java/src/org/broadinstitute/sting/utils/Pileup.java +++ b/java/src/org/broadinstitute/sting/utils/Pileup.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils; +import java.util.ArrayList; + /** * Created by IntelliJ IDEA. * User: depristo @@ -10,8 +12,16 @@ package org.broadinstitute.sting.utils; public interface Pileup { GenomeLoc getLocation(); char getRef(); - String getBases(); - String getQuals(); + + // byte array + byte[] getBases(); + byte[] getQuals(); + + ArrayList getBasesAsArrayList(); + ArrayList getQualsAsArrayList(); + + String getBasesAsString(); + String getQualsAsString(); String getPileupString(); int size(); } diff --git a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java index 9e2bde8c3..1e3ad78d4 100755 --- a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java @@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import net.sf.samtools.SAMRecord; import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; /** * Created by IntelliJ IDEA. @@ -33,7 +35,32 @@ public class ReadBackedPileup extends BasicPileup { this.offsets = offsets; } + public ReadBackedPileup getPileupWithoutDeletions() { + if ( getNumberOfDeletions() > 0 ) { + List newReads = new ArrayList(); + List newOffsets = new ArrayList(); + for ( int i = 0; i < size(); i++ ) { + if ( getOffsets().get(i) != -1 ) { + newReads.add(getReads().get(i)); + newOffsets.add(getOffsets().get(i)); + } + } + return new ReadBackedPileup(loc, ref, newReads, newOffsets); + } else { + return this; + } + } + + public int getNumberOfDeletions() { + int n = 0; + + for ( int i = 0; i < size(); i++ ) { + if ( getOffsets().get(i) != -1 ) { n++; } + } + + return n; + } public int size() { return reads.size(); } public List getReads() { return reads; } @@ -47,7 +74,16 @@ public class ReadBackedPileup extends BasicPileup { return ref; } - public String getBases() { + public byte[] getBases() { + return getBases(reads, offsets, includeDeletions); + } + + public byte[] getQuals() { + return getQuals(reads, offsets, includeDeletions); + } + + + public String getBasesAsString() { return basePileupAsString(reads, offsets, includeDeletions); } @@ -55,13 +91,21 @@ public class ReadBackedPileup extends BasicPileup { return baseWithStrandPileupAsString(reads, offsets, includeDeletions); } - public String getQuals() { + public ArrayList getBasesAsArrayList() { + return getBasesAsArrayList(reads, offsets, includeDeletions); + } + + public ArrayList getQualsAsArrayList() { + return getQualsAsArrayList(reads, offsets, includeDeletions); + } + + public String getQualsAsString() { return qualPileupAsString(reads, offsets); } public String getQualsAsInts() { //System.out.printf("getQualsAsInts"); - return Utils.join(",", qualPileup(reads, offsets)); + return Utils.join(",", getQualsAsArrayList(reads, offsets)); } public String getMappingQualsAsInts() { @@ -73,7 +117,7 @@ public class ReadBackedPileup extends BasicPileup { } public String getSecondaryBasePileup() { - return secondaryBasePileupAsString(reads, offsets); + return getSecondaryBasePileupAsString(reads, offsets); } public String getSecondaryQualPileup() { @@ -121,8 +165,8 @@ public class ReadBackedPileup extends BasicPileup { return String.format("%s %s %s %s %s %s", getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate getRef(), // reference base - getBases(), - qualsAsInts ? getQualsAsInts() : getQuals(), + getBasesAsString(), + qualsAsInts ? getQualsAsInts() : getQualsAsString(), qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() ); } @@ -131,7 +175,7 @@ public class ReadBackedPileup extends BasicPileup { getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate getRef(), // reference base getBasesWithStrand(), - qualsAsInts ? getQualsAsInts() : getQuals(), + qualsAsInts ? getQualsAsInts() : getQualsAsString(), qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() ); } } diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 0358ba457..f39f495e2 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -178,8 +178,8 @@ public class DupUtils { private static Pair combineBaseProbs(List duplicates, int readOffset, int maxQScore) { List offsets = BasicPileup.constantOffset(duplicates, readOffset); - ArrayList bases = BasicPileup.basePileup(duplicates, offsets); - ArrayList quals = BasicPileup.qualPileup(duplicates, offsets); + ArrayList bases = BasicPileup.getBasesAsArrayList(duplicates, offsets); + ArrayList quals = BasicPileup.getQualsAsArrayList(duplicates, offsets); final boolean debug = false; // calculate base probs @@ -205,59 +205,4 @@ public class DupUtils { (char)(byte)combined.getFirst(), combined.getSecond()); return combined; } - - - private static Pair combineDupBasesAtI(List duplicates, int readOffset) { - //System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset); - List offsets = BasicPileup.constantOffset(duplicates, readOffset); - ArrayList bases = BasicPileup.basePileup(duplicates, offsets); - ArrayList quals = BasicPileup.qualPileup(duplicates, offsets); - - // start the recursion - byte combinedBase = bases.get(0); - int combinedQual = quals.get(0); - for ( int i = 1; i < bases.size(); i++ ) { - // recursively combine evidence - byte nextBase = bases.get(i); - byte nextQual = quals.get(i); - Pair combinedI = combine2BasesAndQuals(combinedBase, nextBase, combinedQual, nextQual); - - if ( false ) - System.out.printf("Combining at %d: %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n", - i, - (char)combinedBase, combinedQual, - (char)nextBase, nextQual, - (char)((byte)combinedI.getFirst()), combinedI.getSecond(), - combinedBase == nextBase ? "" : " [MISMATCH]"); - combinedBase = combinedI.getFirst(); - combinedQual = combinedI.getSecond(); - } - - if ( false ) - System.out.printf("%s %s => %c Q%s => Q%d%n", - BasicPileup.basePileupAsString(duplicates, offsets), - BasicPileup.qualPileupAsString(duplicates, offsets), - (char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual)); - return new Pair(combinedBase, QualityUtils.boundQual(combinedQual)); - } - - private static Pair combineBasesByConsensus(List duplicates, int readOffset) { - //System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset); - List offsets = BasicPileup.constantOffset(duplicates, readOffset); - String bases = BasicPileup.basePileupAsString(duplicates, offsets); - ArrayList quals = BasicPileup.qualPileup(duplicates, offsets); - byte combinedBase = BasicPileup.consensusBase(bases); - int baseMatches = Utils.countOccurrences((char)combinedBase, bases); - double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals)); - double mismatchRate = (double)baseMatches / bases.length(); - byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0); - - if ( false ) - System.out.printf("%s %s => %c Q%s => Q%d%n", - BasicPileup.basePileupAsString(duplicates, offsets), - BasicPileup.qualPileupAsString(duplicates, offsets), - (char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual)); - - return new Pair(combinedBase, QualityUtils.boundQual(combinedQual)); - } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java index b23b82c04..e97d17a1e 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java @@ -16,7 +16,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { @Test public void secondBaseSkewTest() { - for ( int test = 1; test <= 5; test ++ ) { + for ( int test = 1; test <= 1; test ++ ) { String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam"; String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls"; String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant";