Pileup cleanup; pooled caller v1

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2070 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-11-18 17:03:48 +00:00
parent f0a234ab29
commit 6fe1c337ff
22 changed files with 259 additions and 161 deletions

View File

@ -379,7 +379,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
public GenomeLoc getLocation() { return loc; }
public String getQuals() { return pileupQuals; }
public String getQualsAsString() { return pileupQuals; }
/** Returns reference base for point genotypes or '*' for indel genotypes, as a char.
*
@ -390,16 +390,20 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
/** Returns pile of observed bases over the current genomic location.
*
*/
public String getBases() { return pileupBases; }
public String getBasesAsString() { return pileupBases; }
/** Returns formatted pileup string for the current genomic location as
* "location: reference_base observed_base_pile observed_qual_pile"
*/
public String getPileupString()
{
return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals());
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
}
public ArrayList<Byte> getBasesAsArrayList() { throw new StingException("Not implemented"); }
public ArrayList<Byte> getQualsAsArrayList() { throw new StingException("Not implemented"); }
public byte[] getBases() { throw new StingException("Not implemented"); }
public byte[] getQuals() { throw new StingException("Not implemented"); }
/** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single
* character (reference base). For indel genotypes, the string is empty for insertions into

View File

@ -70,7 +70,7 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
String bases = pileup.getBases();
String bases = pileup.getBasesAsString();
if ( bases.equals("") && !IGNORE_UNCOVERED_BASES ) {
bases = "***UNCOVERED_SITE***";
@ -119,7 +119,7 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
if( secondBasePileup != null )
return " " + secondBasePileup;
else
return " " + Utils.dupString('N', pileup.getBases().length());
return " " + Utils.dupString('N', pileup.getBasesAsString().length());
}
/**

View File

@ -31,7 +31,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
out.printf("No truth pileup data available at %s%n", pileup.getPileupString());
if ( ! CONTINUE_AFTER_AN_ERROR ) {
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
context.getLocation(), pileup.getBases()));
context.getLocation(), pileup.getBasesAsString()));
}
} else {
String pileupDiff = BasicPileup.pileupDiff(pileup, truePileup);

View File

@ -11,30 +11,31 @@ import java.util.ArrayList;
public class AlleleBalance implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
double ratio = -1;
double ratio;
if ( genotypes.size() > 0 ) {
Genotype g = genotypes.get(0);
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
if ( weightedBalance.second == 0 )
return null;
ratio = weightedBalance.first;
} else {
// this test doesn't make sense for homs
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
if ( genotype == null )
return null;
Genotype g = genotypes.get(0);
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
if ( weightedBalance.second == 0 )
return null;
ratio = weightedBalance.first;
} else {
// this test doesn't make sense for homs
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
if ( genotype == null )
return null;
final String genotypeStr = genotype.getBases().toUpperCase();
if ( genotypeStr.length() != 2 )
return null;
final String genotypeStr = genotype.getBases().toUpperCase();
if ( genotypeStr.length() != 2 )
return null;
final String bases = pileup.getBasesAsString().toUpperCase();
if ( bases.length() == 0 )
return null;
final String bases = pileup.getBases().toUpperCase();
if ( bases.length() == 0 )
return null;
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
}
}
return new Pair<String, String>("AlleleBalance", String.format("%.2f", ratio));

View File

@ -14,28 +14,30 @@ public class OnOffGenotype implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
double ratio;
double ratio = -1;
Genotype g = genotypes.get(0);
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) {
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
if ( weightedBalance.second == 0 )
return null;
ratio = weightedBalance.first;
} else {
Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes);
if ( genotype == null )
return null;
if ( genotypes.size() > 0 ) {
Genotype g = genotypes.get(0);
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) {
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
if ( weightedBalance.second == 0 )
return null;
ratio = weightedBalance.first;
} else {
Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes);
if ( genotype == null )
return null;
final String genotypeStr = genotype.getBases().toUpperCase();
if ( genotypeStr.length() != 2 )
return null;
final String genotypeStr = genotype.getBases().toUpperCase();
if ( genotypeStr.length() != 2 )
return null;
final String bases = pileup.getBases().toUpperCase();
if ( bases.length() == 0 )
return null;
final String bases = pileup.getBasesAsString().toUpperCase();
if ( bases.length() == 0 )
return null;
ratio = computeSingleBalance(genotypeStr, bases);
ratio = computeSingleBalance(genotypeStr, bases);
}
}
return new Pair<String, String>("OnOffGenotype", String.format("%.2f", ratio));

View File

@ -58,7 +58,7 @@ public class SecondBaseSkew implements VariantAnnotation{
}
int variantDepth = 0;
int variantsWithRefSecondBase = 0;
char[] primaryPileup = p.getBases().toCharArray();
byte[] primaryPileup = p.getBases();
String secondBasePileup = p.getSecondaryBasePileup();
if ( secondBasePileup == null ) {

View File

@ -24,7 +24,7 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
String bases = pileup.getBases();
String bases = pileup.getBasesAsString();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
Genotype genotype = Genotype.values()[genotypeIndex];

View File

@ -37,7 +37,7 @@ public class IVFPrimaryBases implements IndependentVariantFeature {
likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
String primaryBases = pileup.getBases();
String primaryBases = pileup.getBasesAsString();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0);

View File

@ -64,7 +64,7 @@ public class IVFSecondaryBases implements IndependentVariantFeature {
likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
String primaryBases = pileup.getBases();
String primaryBases = pileup.getBasesAsString();
String secondaryBases = pileup.getSecondaryBasePileup();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {

View File

@ -44,7 +44,7 @@ public class VECAlleleBalance extends RatioFilter {
if ( genotype.length() > 2 )
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
final String bases = pileup.getBases();
final String bases = pileup.getBasesAsString();
if ( bases.length() == 0 ) {
ratio = 0.0;
return new Pair<Integer, Integer>(0, 0);

View File

@ -39,7 +39,7 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
if ( genotype.length() > 2 )
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
final String bases = pileup.getBases();
final String bases = pileup.getBasesAsString();
if ( bases.length() == 0 ) {
ratio = 0.0;
return new Pair<Integer, Integer>(0, 0);

View File

@ -39,6 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY;
protected double ALLELE_FREQUENCY_RANGE;
protected boolean REPORT_SLOD;
protected int maxDeletionsInPileup;
protected String assumedSingleSample;
protected PrintWriter verboseWriter;
@ -78,12 +79,16 @@ public abstract class GenotypeCalculationModel implements Cloneable {
if ( UAC.VERBOSE != null ) {
try {
verboseWriter = new PrintWriter(UAC.VERBOSE);
initializeVerboseWriter(verboseWriter);
} catch (FileNotFoundException e) {
throw new StingException("Could not open file " + UAC.VERBOSE + " for writing");
}
}
REPORT_SLOD = ! UAC.NO_SLOD;
}
protected void initializeVerboseWriter(PrintWriter writer) { };
public void close() {
if ( verboseWriter != null )
verboseWriter.close();

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import java.io.PrintStream;
import java.io.PrintWriter;
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
@ -61,6 +63,17 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints);
}
protected void initializeVerboseWriter(PrintWriter verboseWriter) {
// by default, no initialization is done
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
char base = Character.toLowerCase(altAllele);
header.append("POfDGivenAFFor" + base + "\t");
header.append("PosteriorAFFor" + base + "\t");
}
verboseWriter.println(header);
}
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
// by default, no initialization is done
return;
@ -175,24 +188,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
}
protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) {
verboseWriter.println("Location=" + loc + ", ref=" + ref);
StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele != ref ) {
char base = Character.toLowerCase(altAllele);
header.append("P(D|AF)_" + base + "\t");
header.append("PosteriorAF_" + base + "\t");
}
}
verboseWriter.println(header);
for (int i = 0; i < frequencyEstimationPoints; i++) {
StringBuilder AFline = new StringBuilder(i + "/" + (frequencyEstimationPoints-1) + "\t" + String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t");
StringBuilder AFline = new StringBuilder("AFINFO\t");
AFline.append(loc).append("\t");
AFline.append(i + "/" + (frequencyEstimationPoints-1) + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/ (frequencyEstimationPoints-1)));
AFline.append(String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t");
for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele != ref ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i]));
} else {
AFline.append(String.format("%.8f\t%.8f\t", -1.0, -1.0));
}
}
verboseWriter.println(AFline);
@ -258,7 +265,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( dbsnp != null )
((IDBacked)locusdata).setID(dbsnp.getRS_ID());
}
if ( locusdata instanceof SLODBacked ) {
if ( locusdata instanceof SLODBacked && REPORT_SLOD ) {
// the overall lod
double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double overallLog10PofF = Math.log10(PofFs[indexOfMax]);

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.*;
@ -47,13 +50,49 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
return contexts;
}
protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
System.out.printf("%s %.2f %d%n", alt, f, pileup.getBases().length());
// TODO -- IMPLEMENT ME
double log10L = 0.0;
return -100.0;
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
int nChromosomes = 2 * getNSamples(contexts);
int nAltAlleles = (int)(f * nChromosomes);
int nRefAlleles = nChromosomes - nAltAlleles;
double log10POfRef = Math.log10(1 - f);
double log10POfAlt = Math.log10(f);
//double log10ChromChooseRef = Math.log10(Arithmetic.binomial(nChromosomes, nRefAlleles));
//double log10ChromChooseAlt = Math.log10(Arithmetic.binomial(nChromosomes, nAltAlleles));
byte[] bases = pileup.getBases();
byte[] quals = pileup.getQuals();
for ( int i = 0; i < bases.length; i++ ) {
int bIndex = BaseUtils.simpleBaseToBaseIndex((char)bases[i]);
byte qual = quals[i];
if ( qual > 0 && bIndex != -1 ) {
double log10POfB = Math.log10(QualityUtils.qualToProb(qual));
double log10POfNotB = Math.log10(QualityUtils.qualToErrorProb(qual));
//System.out.printf("%f %f %f %d%n", log10L, log10POfB, log10POfNotB, qual);
if ( bIndex == refIndex && nRefAlleles > 0 ) {
log10L += log10POfRef + log10POfB;
} else if ( bIndex == altIndex && nAltAlleles > 0) {
log10L += log10POfAlt + log10POfB;
} else {
log10L += log10POfNotB;
}
}
}
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
context.getContext(StratifiedContext.OVERALL).getLocation(),
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
return log10L;
}
}

View File

@ -44,6 +44,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false)
public int POOLSIZE = 0;
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
// control the output
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)

View File

@ -334,13 +334,13 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
int refM = 0;
switch (Character.toUpperCase(p.getRef())) {
case 'A': refM = BasicPileup.countBases(p.getBases()).a;
case 'A': refM = BasicPileup.countBases(p.getBasesAsString()).a;
break;
case 'C': refM = BasicPileup.countBases(p.getBases()).c;
case 'C': refM = BasicPileup.countBases(p.getBasesAsString()).c;
break;
case 'G': refM = BasicPileup.countBases(p.getBases()).g;
case 'G': refM = BasicPileup.countBases(p.getBasesAsString()).g;
break;
case 'T': refM = BasicPileup.countBases(p.getBases()).t;
case 'T': refM = BasicPileup.countBases(p.getBasesAsString()).t;
break;
default:
throw new StingException("Exception in countMismatches - this has been called for a non-reference base. Base character from pileup was " + p.getRef() );
@ -362,7 +362,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
public boolean pileupContainsNoNs(ReadBackedPileup pileup) {
for ( char c : pileup.getBases().toCharArray() ) {
for ( byte c : pileup.getBases() ) {
if ( c == 'N' ) {
return false;
}

View File

@ -72,7 +72,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
for (byte base : pileup.getBases().getBytes()) {
for (byte base : pileup.getBases() ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex((char) base);
if (baseIndex != refIndex) {

View File

@ -26,7 +26,7 @@ abstract public class BasicPileup implements Pileup {
public String getPileupString()
{
return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals());
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
}
public static List<Integer> constantOffset( List<SAMRecord> reads, int i ) {
@ -43,7 +43,7 @@ abstract public class BasicPileup implements Pileup {
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
StringBuilder bases = new StringBuilder();
for ( byte base : basePileup(reads, offsets, includeDeletions)) {
for ( byte base : getBasesAsArrayList(reads, offsets, includeDeletions)) {
bases.append((char)base);
}
return bases.toString();
@ -81,11 +81,33 @@ abstract public class BasicPileup implements Pileup {
return bases.toString();
}
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
return basePileup( reads, offsets, false );
//
// byte[] methods
//
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
return ArrayList2byte(getBasesAsArrayList( reads, offsets ));
}
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions ));
}
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
return ArrayList2byte(getQualsAsArrayList( reads, offsets ));
}
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions ));
}
//
// ArrayList<Byte> methods
//
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
return getBasesAsArrayList( reads, offsets, false );
}
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
@ -100,16 +122,24 @@ abstract public class BasicPileup implements Pileup {
return bases;
}
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
return getQualsAsArrayList( reads, offsets, false );
}
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
// skip deletion sites
if ( offset == -1 )
continue;
byte qual = (byte)read.getBaseQualities()[offset];
quals.add(qual);
if ( offset == -1 ) {
if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
quals.add((byte)0);
} else {
byte qual = read.getBaseQualities()[offset];
quals.add(qual);
}
}
return quals;
}
@ -128,6 +158,14 @@ abstract public class BasicPileup implements Pileup {
return quals2String(mappingQualPileup(reads));
}
private static byte[] ArrayList2byte(ArrayList<Byte> ab) {
byte[] ret = new byte[ab.size()];
int i = 0;
for (byte e : ab)
ret[i++] = e;
return ret;
}
public static String quals2String( List<Byte> quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
@ -140,10 +178,11 @@ abstract public class BasicPileup implements Pileup {
}
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
return quals2String(qualPileup(reads, offsets));
return quals2String(getQualsAsArrayList(reads, offsets));
}
public static ArrayList<Byte> secondaryBasePileup( List<SAMRecord> reads, List<Integer> offsets ) {
// todo -- there are probably bugs in here due to includeDeletion flags
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
boolean hasAtLeastOneSQorE2Field = false;
@ -184,13 +223,13 @@ abstract public class BasicPileup implements Pileup {
return (hasAtLeastOneSQorE2Field ? bases2 : null);
}
public static String secondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
StringBuilder bases2 = new StringBuilder();
ArrayList<Byte> sbases = secondaryBasePileup(reads, offsets);
ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
if (sbases == null) { return null; }
ArrayList<Byte> pbases = basePileup(reads, offsets,true);
ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets,true);
Random generator = new Random();
@ -391,13 +430,13 @@ abstract public class BasicPileup implements Pileup {
if ( a.getLocation().compareTo(b.getLocation()) != 0 )
return "Locations not equal";
String aBases = maybeSorted(a.getBases(), ! orderDependent );
String bBases = maybeSorted(b.getBases(), ! orderDependent );
String aBases = maybeSorted(a.getBasesAsString(), ! orderDependent );
String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent );
if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) )
return "Bases not equal";
String aQuals = maybeSorted(a.getQuals(), ! orderDependent );
String bQuals = maybeSorted(b.getQuals(), ! orderDependent );
String aQuals = maybeSorted(a.getQualsAsString(), ! orderDependent );
String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent );
if ( ! aQuals.equals(bQuals) )
return "Quals not equal";

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils;
import java.util.ArrayList;
/**
* Created by IntelliJ IDEA.
* User: depristo
@ -10,8 +12,16 @@ package org.broadinstitute.sting.utils;
public interface Pileup {
GenomeLoc getLocation();
char getRef();
String getBases();
String getQuals();
// byte array
byte[] getBases();
byte[] getQuals();
ArrayList<Byte> getBasesAsArrayList();
ArrayList<Byte> getQualsAsArrayList();
String getBasesAsString();
String getQualsAsString();
String getPileupString();
int size();
}

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
/**
* Created by IntelliJ IDEA.
@ -33,7 +35,32 @@ public class ReadBackedPileup extends BasicPileup {
this.offsets = offsets;
}
public ReadBackedPileup getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
List<Integer> newOffsets = new ArrayList<Integer>();
for ( int i = 0; i < size(); i++ ) {
if ( getOffsets().get(i) != -1 ) {
newReads.add(getReads().get(i));
newOffsets.add(getOffsets().get(i));
}
}
return new ReadBackedPileup(loc, ref, newReads, newOffsets);
} else {
return this;
}
}
public int getNumberOfDeletions() {
int n = 0;
for ( int i = 0; i < size(); i++ ) {
if ( getOffsets().get(i) != -1 ) { n++; }
}
return n;
}
public int size() { return reads.size(); }
public List<SAMRecord> getReads() { return reads; }
@ -47,7 +74,16 @@ public class ReadBackedPileup extends BasicPileup {
return ref;
}
public String getBases() {
public byte[] getBases() {
return getBases(reads, offsets, includeDeletions);
}
public byte[] getQuals() {
return getQuals(reads, offsets, includeDeletions);
}
public String getBasesAsString() {
return basePileupAsString(reads, offsets, includeDeletions);
}
@ -55,13 +91,21 @@ public class ReadBackedPileup extends BasicPileup {
return baseWithStrandPileupAsString(reads, offsets, includeDeletions);
}
public String getQuals() {
public ArrayList<Byte> getBasesAsArrayList() {
return getBasesAsArrayList(reads, offsets, includeDeletions);
}
public ArrayList<Byte> getQualsAsArrayList() {
return getQualsAsArrayList(reads, offsets, includeDeletions);
}
public String getQualsAsString() {
return qualPileupAsString(reads, offsets);
}
public String getQualsAsInts() {
//System.out.printf("getQualsAsInts");
return Utils.join(",", qualPileup(reads, offsets));
return Utils.join(",", getQualsAsArrayList(reads, offsets));
}
public String getMappingQualsAsInts() {
@ -73,7 +117,7 @@ public class ReadBackedPileup extends BasicPileup {
}
public String getSecondaryBasePileup() {
return secondaryBasePileupAsString(reads, offsets);
return getSecondaryBasePileupAsString(reads, offsets);
}
public String getSecondaryQualPileup() {
@ -121,8 +165,8 @@ public class ReadBackedPileup extends BasicPileup {
return String.format("%s %s %s %s %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
getRef(), // reference base
getBases(),
qualsAsInts ? getQualsAsInts() : getQuals(),
getBasesAsString(),
qualsAsInts ? getQualsAsInts() : getQualsAsString(),
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
}
@ -131,7 +175,7 @@ public class ReadBackedPileup extends BasicPileup {
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
getRef(), // reference base
getBasesWithStrand(),
qualsAsInts ? getQualsAsInts() : getQuals(),
qualsAsInts ? getQualsAsInts() : getQualsAsString(),
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
}
}

View File

@ -178,8 +178,8 @@ public class DupUtils {
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
ArrayList<Byte> bases = BasicPileup.basePileup(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
ArrayList<Byte> bases = BasicPileup.getBasesAsArrayList(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.getQualsAsArrayList(duplicates, offsets);
final boolean debug = false;
// calculate base probs
@ -205,59 +205,4 @@ public class DupUtils {
(char)(byte)combined.getFirst(), combined.getSecond());
return combined;
}
private static Pair<Byte, Byte> combineDupBasesAtI(List<SAMRecord> duplicates, int readOffset) {
//System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset);
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
ArrayList<Byte> bases = BasicPileup.basePileup(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
// start the recursion
byte combinedBase = bases.get(0);
int combinedQual = quals.get(0);
for ( int i = 1; i < bases.size(); i++ ) {
// recursively combine evidence
byte nextBase = bases.get(i);
byte nextQual = quals.get(i);
Pair<Byte, Integer> combinedI = combine2BasesAndQuals(combinedBase, nextBase, combinedQual, nextQual);
if ( false )
System.out.printf("Combining at %d: %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n",
i,
(char)combinedBase, combinedQual,
(char)nextBase, nextQual,
(char)((byte)combinedI.getFirst()), combinedI.getSecond(),
combinedBase == nextBase ? "" : " [MISMATCH]");
combinedBase = combinedI.getFirst();
combinedQual = combinedI.getSecond();
}
if ( false )
System.out.printf("%s %s => %c Q%s => Q%d%n",
BasicPileup.basePileupAsString(duplicates, offsets),
BasicPileup.qualPileupAsString(duplicates, offsets),
(char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual));
return new Pair<Byte, Byte>(combinedBase, QualityUtils.boundQual(combinedQual));
}
private static Pair<Byte, Byte> combineBasesByConsensus(List<SAMRecord> duplicates, int readOffset) {
//System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset);
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
byte combinedBase = BasicPileup.consensusBase(bases);
int baseMatches = Utils.countOccurrences((char)combinedBase, bases);
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
double mismatchRate = (double)baseMatches / bases.length();
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);
if ( false )
System.out.printf("%s %s => %c Q%s => Q%d%n",
BasicPileup.basePileupAsString(duplicates, offsets),
BasicPileup.qualPileupAsString(duplicates, offsets),
(char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual));
return new Pair<Byte, Byte>(combinedBase, QualityUtils.boundQual(combinedQual));
}
}

View File

@ -16,7 +16,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest {
@Test
public void secondBaseSkewTest() {
for ( int test = 1; test <= 5; test ++ ) {
for ( int test = 1; test <= 1; test ++ ) {
String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam";
String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls";
String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant";