Pileup cleanup; pooled caller v1

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2070 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-11-18 17:03:48 +00:00
parent f0a234ab29
commit 6fe1c337ff
22 changed files with 259 additions and 161 deletions

View File

@ -379,7 +379,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
public GenomeLoc getLocation() { return loc; } public GenomeLoc getLocation() { return loc; }
public String getQuals() { return pileupQuals; } public String getQualsAsString() { return pileupQuals; }
/** Returns reference base for point genotypes or '*' for indel genotypes, as a char. /** Returns reference base for point genotypes or '*' for indel genotypes, as a char.
* *
@ -390,16 +390,20 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
/** Returns pile of observed bases over the current genomic location. /** Returns pile of observed bases over the current genomic location.
* *
*/ */
public String getBases() { return pileupBases; } public String getBasesAsString() { return pileupBases; }
/** Returns formatted pileup string for the current genomic location as /** Returns formatted pileup string for the current genomic location as
* "location: reference_base observed_base_pile observed_qual_pile" * "location: reference_base observed_base_pile observed_qual_pile"
*/ */
public String getPileupString() public String getPileupString()
{ {
return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals()); return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
} }
public ArrayList<Byte> getBasesAsArrayList() { throw new StingException("Not implemented"); }
public ArrayList<Byte> getQualsAsArrayList() { throw new StingException("Not implemented"); }
public byte[] getBases() { throw new StingException("Not implemented"); }
public byte[] getQuals() { throw new StingException("Not implemented"); }
/** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single /** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single
* character (reference base). For indel genotypes, the string is empty for insertions into * character (reference base). For indel genotypes, the string is empty for insertions into

View File

@ -70,7 +70,7 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
String bases = pileup.getBases(); String bases = pileup.getBasesAsString();
if ( bases.equals("") && !IGNORE_UNCOVERED_BASES ) { if ( bases.equals("") && !IGNORE_UNCOVERED_BASES ) {
bases = "***UNCOVERED_SITE***"; bases = "***UNCOVERED_SITE***";
@ -119,7 +119,7 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
if( secondBasePileup != null ) if( secondBasePileup != null )
return " " + secondBasePileup; return " " + secondBasePileup;
else else
return " " + Utils.dupString('N', pileup.getBases().length()); return " " + Utils.dupString('N', pileup.getBasesAsString().length());
} }
/** /**

View File

@ -31,7 +31,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
out.printf("No truth pileup data available at %s%n", pileup.getPileupString()); out.printf("No truth pileup data available at %s%n", pileup.getPileupString());
if ( ! CONTINUE_AFTER_AN_ERROR ) { if ( ! CONTINUE_AFTER_AN_ERROR ) {
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases", Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
context.getLocation(), pileup.getBases())); context.getLocation(), pileup.getBasesAsString()));
} }
} else { } else {
String pileupDiff = BasicPileup.pileupDiff(pileup, truePileup); String pileupDiff = BasicPileup.pileupDiff(pileup, truePileup);

View File

@ -11,9 +11,9 @@ import java.util.ArrayList;
public class AlleleBalance implements VariantAnnotation { public class AlleleBalance implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) { public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
double ratio = -1;
double ratio; if ( genotypes.size() > 0 ) {
Genotype g = genotypes.get(0); Genotype g = genotypes.get(0);
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) { if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
@ -30,12 +30,13 @@ public class AlleleBalance implements VariantAnnotation {
if ( genotypeStr.length() != 2 ) if ( genotypeStr.length() != 2 )
return null; return null;
final String bases = pileup.getBases().toUpperCase(); final String bases = pileup.getBasesAsString().toUpperCase();
if ( bases.length() == 0 ) if ( bases.length() == 0 )
return null; return null;
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases); ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
} }
}
return new Pair<String, String>("AlleleBalance", String.format("%.2f", ratio)); return new Pair<String, String>("AlleleBalance", String.format("%.2f", ratio));
} }

View File

@ -14,8 +14,9 @@ public class OnOffGenotype implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) { public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
double ratio; double ratio = -1;
if ( genotypes.size() > 0 ) {
Genotype g = genotypes.get(0); Genotype g = genotypes.get(0);
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) { if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) {
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
@ -31,12 +32,13 @@ public class OnOffGenotype implements VariantAnnotation {
if ( genotypeStr.length() != 2 ) if ( genotypeStr.length() != 2 )
return null; return null;
final String bases = pileup.getBases().toUpperCase(); final String bases = pileup.getBasesAsString().toUpperCase();
if ( bases.length() == 0 ) if ( bases.length() == 0 )
return null; return null;
ratio = computeSingleBalance(genotypeStr, bases); ratio = computeSingleBalance(genotypeStr, bases);
} }
}
return new Pair<String, String>("OnOffGenotype", String.format("%.2f", ratio)); return new Pair<String, String>("OnOffGenotype", String.format("%.2f", ratio));
} }

View File

@ -58,7 +58,7 @@ public class SecondBaseSkew implements VariantAnnotation{
} }
int variantDepth = 0; int variantDepth = 0;
int variantsWithRefSecondBase = 0; int variantsWithRefSecondBase = 0;
char[] primaryPileup = p.getBases().toCharArray(); byte[] primaryPileup = p.getBases();
String secondBasePileup = p.getSecondaryBasePileup(); String secondBasePileup = p.getSecondaryBasePileup();
if ( secondBasePileup == null ) { if ( secondBasePileup == null ) {

View File

@ -24,7 +24,7 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads(); List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
String bases = pileup.getBases(); String bases = pileup.getBasesAsString();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
Genotype genotype = Genotype.values()[genotypeIndex]; Genotype genotype = Genotype.values()[genotypeIndex];

View File

@ -37,7 +37,7 @@ public class IVFPrimaryBases implements IndependentVariantFeature {
likelihoods = new double[10]; likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
String primaryBases = pileup.getBases(); String primaryBases = pileup.getBasesAsString();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0); char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0);

View File

@ -64,7 +64,7 @@ public class IVFSecondaryBases implements IndependentVariantFeature {
likelihoods = new double[10]; likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads())); ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
String primaryBases = pileup.getBases(); String primaryBases = pileup.getBasesAsString();
String secondaryBases = pileup.getSecondaryBasePileup(); String secondaryBases = pileup.getSecondaryBasePileup();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {

View File

@ -44,7 +44,7 @@ public class VECAlleleBalance extends RatioFilter {
if ( genotype.length() > 2 ) if ( genotype.length() > 2 )
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
final String bases = pileup.getBases(); final String bases = pileup.getBasesAsString();
if ( bases.length() == 0 ) { if ( bases.length() == 0 ) {
ratio = 0.0; ratio = 0.0;
return new Pair<Integer, Integer>(0, 0); return new Pair<Integer, Integer>(0, 0);

View File

@ -39,7 +39,7 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
if ( genotype.length() > 2 ) if ( genotype.length() > 2 )
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype)); throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
final String bases = pileup.getBases(); final String bases = pileup.getBasesAsString();
if ( bases.length() == 0 ) { if ( bases.length() == 0 ) {
ratio = 0.0; ratio = 0.0;
return new Pair<Integer, Integer>(0, 0); return new Pair<Integer, Integer>(0, 0);

View File

@ -39,6 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double CONFIDENCE_THRESHOLD; protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY; protected double MINIMUM_ALLELE_FREQUENCY;
protected double ALLELE_FREQUENCY_RANGE; protected double ALLELE_FREQUENCY_RANGE;
protected boolean REPORT_SLOD;
protected int maxDeletionsInPileup; protected int maxDeletionsInPileup;
protected String assumedSingleSample; protected String assumedSingleSample;
protected PrintWriter verboseWriter; protected PrintWriter verboseWriter;
@ -78,12 +79,16 @@ public abstract class GenotypeCalculationModel implements Cloneable {
if ( UAC.VERBOSE != null ) { if ( UAC.VERBOSE != null ) {
try { try {
verboseWriter = new PrintWriter(UAC.VERBOSE); verboseWriter = new PrintWriter(UAC.VERBOSE);
initializeVerboseWriter(verboseWriter);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("Could not open file " + UAC.VERBOSE + " for writing"); throw new StingException("Could not open file " + UAC.VERBOSE + " for writing");
} }
} }
REPORT_SLOD = ! UAC.NO_SLOD;
} }
protected void initializeVerboseWriter(PrintWriter writer) { };
public void close() { public void close() {
if ( verboseWriter != null ) if ( verboseWriter != null )
verboseWriter.close(); verboseWriter.close();

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*; import java.util.*;
import java.io.PrintStream;
import java.io.PrintWriter;
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
@ -61,6 +63,17 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints); return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints);
} }
protected void initializeVerboseWriter(PrintWriter verboseWriter) {
// by default, no initialization is done
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
char base = Character.toLowerCase(altAllele);
header.append("POfDGivenAFFor" + base + "\t");
header.append("PosteriorAFFor" + base + "\t");
}
verboseWriter.println(header);
}
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) { protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
// by default, no initialization is done // by default, no initialization is done
return; return;
@ -175,24 +188,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
} }
protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) { protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) {
verboseWriter.println("Location=" + loc + ", ref=" + ref);
StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele != ref ) {
char base = Character.toLowerCase(altAllele);
header.append("P(D|AF)_" + base + "\t");
header.append("PosteriorAF_" + base + "\t");
}
}
verboseWriter.println(header);
for (int i = 0; i < frequencyEstimationPoints; i++) { for (int i = 0; i < frequencyEstimationPoints; i++) {
StringBuilder AFline = new StringBuilder(i + "/" + (frequencyEstimationPoints-1) + "\t" + String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t"); StringBuilder AFline = new StringBuilder("AFINFO\t");
AFline.append(loc).append("\t");
AFline.append(i + "/" + (frequencyEstimationPoints-1) + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/ (frequencyEstimationPoints-1)));
AFline.append(String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t");
for ( char altAllele : BaseUtils.BASES ) { for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele != ref ) { if ( altAllele != ref ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i])); AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i]));
} else {
AFline.append(String.format("%.8f\t%.8f\t", -1.0, -1.0));
} }
} }
verboseWriter.println(AFline); verboseWriter.println(AFline);
@ -258,7 +265,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( dbsnp != null ) if ( dbsnp != null )
((IDBacked)locusdata).setID(dbsnp.getRS_ID()); ((IDBacked)locusdata).setID(dbsnp.getRS_ID());
} }
if ( locusdata instanceof SLODBacked ) { if ( locusdata instanceof SLODBacked && REPORT_SLOD ) {
// the overall lod // the overall lod
double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double overallLog10PofF = Math.log10(PofFs[indexOfMax]); double overallLog10PofF = Math.log10(PofFs[indexOfMax]);

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.*; import java.util.*;
@ -47,13 +50,49 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
return contexts; return contexts;
} }
protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) { protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME); AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
System.out.printf("%s %.2f %d%n", alt, f, pileup.getBases().length()); double log10L = 0.0;
// TODO -- IMPLEMENT ME
return -100.0; int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
int nChromosomes = 2 * getNSamples(contexts);
int nAltAlleles = (int)(f * nChromosomes);
int nRefAlleles = nChromosomes - nAltAlleles;
double log10POfRef = Math.log10(1 - f);
double log10POfAlt = Math.log10(f);
//double log10ChromChooseRef = Math.log10(Arithmetic.binomial(nChromosomes, nRefAlleles));
//double log10ChromChooseAlt = Math.log10(Arithmetic.binomial(nChromosomes, nAltAlleles));
byte[] bases = pileup.getBases();
byte[] quals = pileup.getQuals();
for ( int i = 0; i < bases.length; i++ ) {
int bIndex = BaseUtils.simpleBaseToBaseIndex((char)bases[i]);
byte qual = quals[i];
if ( qual > 0 && bIndex != -1 ) {
double log10POfB = Math.log10(QualityUtils.qualToProb(qual));
double log10POfNotB = Math.log10(QualityUtils.qualToErrorProb(qual));
//System.out.printf("%f %f %f %d%n", log10L, log10POfB, log10POfNotB, qual);
if ( bIndex == refIndex && nRefAlleles > 0 ) {
log10L += log10POfRef + log10POfB;
} else if ( bIndex == altIndex && nAltAlleles > 0) {
log10L += log10POfAlt + log10POfB;
} else {
log10L += log10POfNotB;
}
}
}
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
context.getContext(StratifiedContext.OVERALL).getLocation(),
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
return log10L;
} }
} }

View File

@ -44,6 +44,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false) @Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false)
public int POOLSIZE = 0; public int POOLSIZE = 0;
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
// control the output // control the output
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)

View File

@ -334,13 +334,13 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
int refM = 0; int refM = 0;
switch (Character.toUpperCase(p.getRef())) { switch (Character.toUpperCase(p.getRef())) {
case 'A': refM = BasicPileup.countBases(p.getBases()).a; case 'A': refM = BasicPileup.countBases(p.getBasesAsString()).a;
break; break;
case 'C': refM = BasicPileup.countBases(p.getBases()).c; case 'C': refM = BasicPileup.countBases(p.getBasesAsString()).c;
break; break;
case 'G': refM = BasicPileup.countBases(p.getBases()).g; case 'G': refM = BasicPileup.countBases(p.getBasesAsString()).g;
break; break;
case 'T': refM = BasicPileup.countBases(p.getBases()).t; case 'T': refM = BasicPileup.countBases(p.getBasesAsString()).t;
break; break;
default: default:
throw new StingException("Exception in countMismatches - this has been called for a non-reference base. Base character from pileup was " + p.getRef() ); throw new StingException("Exception in countMismatches - this has been called for a non-reference base. Base character from pileup was " + p.getRef() );
@ -362,7 +362,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
} }
public boolean pileupContainsNoNs(ReadBackedPileup pileup) { public boolean pileupContainsNoNs(ReadBackedPileup pileup) {
for ( char c : pileup.getBases().toCharArray() ) { for ( byte c : pileup.getBases() ) {
if ( c == 'N' ) { if ( c == 'N' ) {
return false; return false;
} }

View File

@ -72,7 +72,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase()); int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
for (byte base : pileup.getBases().getBytes()) { for (byte base : pileup.getBases() ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex((char) base); int baseIndex = BaseUtils.simpleBaseToBaseIndex((char) base);
if (baseIndex != refIndex) { if (baseIndex != refIndex) {

View File

@ -26,7 +26,7 @@ abstract public class BasicPileup implements Pileup {
public String getPileupString() public String getPileupString()
{ {
return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals()); return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
} }
public static List<Integer> constantOffset( List<SAMRecord> reads, int i ) { public static List<Integer> constantOffset( List<SAMRecord> reads, int i ) {
@ -43,7 +43,7 @@ abstract public class BasicPileup implements Pileup {
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) { public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
StringBuilder bases = new StringBuilder(); StringBuilder bases = new StringBuilder();
for ( byte base : basePileup(reads, offsets, includeDeletions)) { for ( byte base : getBasesAsArrayList(reads, offsets, includeDeletions)) {
bases.append((char)base); bases.append((char)base);
} }
return bases.toString(); return bases.toString();
@ -81,11 +81,33 @@ abstract public class BasicPileup implements Pileup {
return bases.toString(); return bases.toString();
} }
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) { //
return basePileup( reads, offsets, false ); // byte[] methods
//
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
return ArrayList2byte(getBasesAsArrayList( reads, offsets ));
} }
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) { public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions ));
}
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
return ArrayList2byte(getQualsAsArrayList( reads, offsets ));
}
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions ));
}
//
// ArrayList<Byte> methods
//
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
return getBasesAsArrayList( reads, offsets, false );
}
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size()); ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) { for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
@ -100,17 +122,25 @@ abstract public class BasicPileup implements Pileup {
return bases; return bases;
} }
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) { public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
return getQualsAsArrayList( reads, offsets, false );
}
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size()); ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) { for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
int offset = offsets.get(i); int offset = offsets.get(i);
// skip deletion sites // skip deletion sites
if ( offset == -1 ) if ( offset == -1 ) {
continue; if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
byte qual = (byte)read.getBaseQualities()[offset]; quals.add((byte)0);
} else {
byte qual = read.getBaseQualities()[offset];
quals.add(qual); quals.add(qual);
} }
}
return quals; return quals;
} }
@ -128,6 +158,14 @@ abstract public class BasicPileup implements Pileup {
return quals2String(mappingQualPileup(reads)); return quals2String(mappingQualPileup(reads));
} }
private static byte[] ArrayList2byte(ArrayList<Byte> ab) {
byte[] ret = new byte[ab.size()];
int i = 0;
for (byte e : ab)
ret[i++] = e;
return ret;
}
public static String quals2String( List<Byte> quals ) { public static String quals2String( List<Byte> quals ) {
StringBuilder qualStr = new StringBuilder(); StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) { for ( int qual : quals ) {
@ -140,10 +178,11 @@ abstract public class BasicPileup implements Pileup {
} }
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) { public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
return quals2String(qualPileup(reads, offsets)); return quals2String(getQualsAsArrayList(reads, offsets));
} }
public static ArrayList<Byte> secondaryBasePileup( List<SAMRecord> reads, List<Integer> offsets ) { // todo -- there are probably bugs in here due to includeDeletion flags
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size()); ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
boolean hasAtLeastOneSQorE2Field = false; boolean hasAtLeastOneSQorE2Field = false;
@ -184,13 +223,13 @@ abstract public class BasicPileup implements Pileup {
return (hasAtLeastOneSQorE2Field ? bases2 : null); return (hasAtLeastOneSQorE2Field ? bases2 : null);
} }
public static String secondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) { public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
StringBuilder bases2 = new StringBuilder(); StringBuilder bases2 = new StringBuilder();
ArrayList<Byte> sbases = secondaryBasePileup(reads, offsets); ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
if (sbases == null) { return null; } if (sbases == null) { return null; }
ArrayList<Byte> pbases = basePileup(reads, offsets,true); ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets,true);
Random generator = new Random(); Random generator = new Random();
@ -391,13 +430,13 @@ abstract public class BasicPileup implements Pileup {
if ( a.getLocation().compareTo(b.getLocation()) != 0 ) if ( a.getLocation().compareTo(b.getLocation()) != 0 )
return "Locations not equal"; return "Locations not equal";
String aBases = maybeSorted(a.getBases(), ! orderDependent ); String aBases = maybeSorted(a.getBasesAsString(), ! orderDependent );
String bBases = maybeSorted(b.getBases(), ! orderDependent ); String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent );
if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) ) if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) )
return "Bases not equal"; return "Bases not equal";
String aQuals = maybeSorted(a.getQuals(), ! orderDependent ); String aQuals = maybeSorted(a.getQualsAsString(), ! orderDependent );
String bQuals = maybeSorted(b.getQuals(), ! orderDependent ); String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent );
if ( ! aQuals.equals(bQuals) ) if ( ! aQuals.equals(bQuals) )
return "Quals not equal"; return "Quals not equal";

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import java.util.ArrayList;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: depristo * User: depristo
@ -10,8 +12,16 @@ package org.broadinstitute.sting.utils;
public interface Pileup { public interface Pileup {
GenomeLoc getLocation(); GenomeLoc getLocation();
char getRef(); char getRef();
String getBases();
String getQuals(); // byte array
byte[] getBases();
byte[] getQuals();
ArrayList<Byte> getBasesAsArrayList();
ArrayList<Byte> getQualsAsArrayList();
String getBasesAsString();
String getQualsAsString();
String getPileupString(); String getPileupString();
int size(); int size();
} }

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import java.util.List; import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -33,7 +35,32 @@ public class ReadBackedPileup extends BasicPileup {
this.offsets = offsets; this.offsets = offsets;
} }
public ReadBackedPileup getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
List<Integer> newOffsets = new ArrayList<Integer>();
for ( int i = 0; i < size(); i++ ) {
if ( getOffsets().get(i) != -1 ) {
newReads.add(getReads().get(i));
newOffsets.add(getOffsets().get(i));
}
}
return new ReadBackedPileup(loc, ref, newReads, newOffsets);
} else {
return this;
}
}
public int getNumberOfDeletions() {
int n = 0;
for ( int i = 0; i < size(); i++ ) {
if ( getOffsets().get(i) != -1 ) { n++; }
}
return n;
}
public int size() { return reads.size(); } public int size() { return reads.size(); }
public List<SAMRecord> getReads() { return reads; } public List<SAMRecord> getReads() { return reads; }
@ -47,7 +74,16 @@ public class ReadBackedPileup extends BasicPileup {
return ref; return ref;
} }
public String getBases() { public byte[] getBases() {
return getBases(reads, offsets, includeDeletions);
}
public byte[] getQuals() {
return getQuals(reads, offsets, includeDeletions);
}
public String getBasesAsString() {
return basePileupAsString(reads, offsets, includeDeletions); return basePileupAsString(reads, offsets, includeDeletions);
} }
@ -55,13 +91,21 @@ public class ReadBackedPileup extends BasicPileup {
return baseWithStrandPileupAsString(reads, offsets, includeDeletions); return baseWithStrandPileupAsString(reads, offsets, includeDeletions);
} }
public String getQuals() { public ArrayList<Byte> getBasesAsArrayList() {
return getBasesAsArrayList(reads, offsets, includeDeletions);
}
public ArrayList<Byte> getQualsAsArrayList() {
return getQualsAsArrayList(reads, offsets, includeDeletions);
}
public String getQualsAsString() {
return qualPileupAsString(reads, offsets); return qualPileupAsString(reads, offsets);
} }
public String getQualsAsInts() { public String getQualsAsInts() {
//System.out.printf("getQualsAsInts"); //System.out.printf("getQualsAsInts");
return Utils.join(",", qualPileup(reads, offsets)); return Utils.join(",", getQualsAsArrayList(reads, offsets));
} }
public String getMappingQualsAsInts() { public String getMappingQualsAsInts() {
@ -73,7 +117,7 @@ public class ReadBackedPileup extends BasicPileup {
} }
public String getSecondaryBasePileup() { public String getSecondaryBasePileup() {
return secondaryBasePileupAsString(reads, offsets); return getSecondaryBasePileupAsString(reads, offsets);
} }
public String getSecondaryQualPileup() { public String getSecondaryQualPileup() {
@ -121,8 +165,8 @@ public class ReadBackedPileup extends BasicPileup {
return String.format("%s %s %s %s %s %s", return String.format("%s %s %s %s %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
getRef(), // reference base getRef(), // reference base
getBases(), getBasesAsString(),
qualsAsInts ? getQualsAsInts() : getQuals(), qualsAsInts ? getQualsAsInts() : getQualsAsString(),
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() ); qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
} }
@ -131,7 +175,7 @@ public class ReadBackedPileup extends BasicPileup {
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
getRef(), // reference base getRef(), // reference base
getBasesWithStrand(), getBasesWithStrand(),
qualsAsInts ? getQualsAsInts() : getQuals(), qualsAsInts ? getQualsAsInts() : getQualsAsString(),
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() ); qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
} }
} }

View File

@ -178,8 +178,8 @@ public class DupUtils {
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) { private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset); List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
ArrayList<Byte> bases = BasicPileup.basePileup(duplicates, offsets); ArrayList<Byte> bases = BasicPileup.getBasesAsArrayList(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets); ArrayList<Byte> quals = BasicPileup.getQualsAsArrayList(duplicates, offsets);
final boolean debug = false; final boolean debug = false;
// calculate base probs // calculate base probs
@ -205,59 +205,4 @@ public class DupUtils {
(char)(byte)combined.getFirst(), combined.getSecond()); (char)(byte)combined.getFirst(), combined.getSecond());
return combined; return combined;
} }
private static Pair<Byte, Byte> combineDupBasesAtI(List<SAMRecord> duplicates, int readOffset) {
//System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset);
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
ArrayList<Byte> bases = BasicPileup.basePileup(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
// start the recursion
byte combinedBase = bases.get(0);
int combinedQual = quals.get(0);
for ( int i = 1; i < bases.size(); i++ ) {
// recursively combine evidence
byte nextBase = bases.get(i);
byte nextQual = quals.get(i);
Pair<Byte, Integer> combinedI = combine2BasesAndQuals(combinedBase, nextBase, combinedQual, nextQual);
if ( false )
System.out.printf("Combining at %d: %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n",
i,
(char)combinedBase, combinedQual,
(char)nextBase, nextQual,
(char)((byte)combinedI.getFirst()), combinedI.getSecond(),
combinedBase == nextBase ? "" : " [MISMATCH]");
combinedBase = combinedI.getFirst();
combinedQual = combinedI.getSecond();
}
if ( false )
System.out.printf("%s %s => %c Q%s => Q%d%n",
BasicPileup.basePileupAsString(duplicates, offsets),
BasicPileup.qualPileupAsString(duplicates, offsets),
(char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual));
return new Pair<Byte, Byte>(combinedBase, QualityUtils.boundQual(combinedQual));
}
private static Pair<Byte, Byte> combineBasesByConsensus(List<SAMRecord> duplicates, int readOffset) {
//System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset);
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
byte combinedBase = BasicPileup.consensusBase(bases);
int baseMatches = Utils.countOccurrences((char)combinedBase, bases);
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
double mismatchRate = (double)baseMatches / bases.length();
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);
if ( false )
System.out.printf("%s %s => %c Q%s => Q%d%n",
BasicPileup.basePileupAsString(duplicates, offsets),
BasicPileup.qualPileupAsString(duplicates, offsets),
(char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual));
return new Pair<Byte, Byte>(combinedBase, QualityUtils.boundQual(combinedQual));
}
} }

View File

@ -16,7 +16,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest {
@Test @Test
public void secondBaseSkewTest() { public void secondBaseSkewTest() {
for ( int test = 1; test <= 5; test ++ ) { for ( int test = 1; test <= 1; test ++ ) {
String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam"; String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam";
String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls"; String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls";
String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant"; String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant";