Pileup cleanup; pooled caller v1
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2070 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f0a234ab29
commit
6fe1c337ff
|
|
@ -379,7 +379,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
|
||||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
public String getQuals() { return pileupQuals; }
|
||||
public String getQualsAsString() { return pileupQuals; }
|
||||
|
||||
/** Returns reference base for point genotypes or '*' for indel genotypes, as a char.
|
||||
*
|
||||
|
|
@ -390,16 +390,20 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
/** Returns pile of observed bases over the current genomic location.
|
||||
*
|
||||
*/
|
||||
public String getBases() { return pileupBases; }
|
||||
public String getBasesAsString() { return pileupBases; }
|
||||
|
||||
/** Returns formatted pileup string for the current genomic location as
|
||||
* "location: reference_base observed_base_pile observed_qual_pile"
|
||||
*/
|
||||
public String getPileupString()
|
||||
{
|
||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals());
|
||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
}
|
||||
|
||||
public ArrayList<Byte> getBasesAsArrayList() { throw new StingException("Not implemented"); }
|
||||
public ArrayList<Byte> getQualsAsArrayList() { throw new StingException("Not implemented"); }
|
||||
public byte[] getBases() { throw new StingException("Not implemented"); }
|
||||
public byte[] getQuals() { throw new StingException("Not implemented"); }
|
||||
|
||||
/** Returns bases in the reference allele as a String. For point genotypes, the string consists of a single
|
||||
* character (reference base). For indel genotypes, the string is empty for insertions into
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
String bases = pileup.getBases();
|
||||
String bases = pileup.getBasesAsString();
|
||||
|
||||
if ( bases.equals("") && !IGNORE_UNCOVERED_BASES ) {
|
||||
bases = "***UNCOVERED_SITE***";
|
||||
|
|
@ -119,7 +119,7 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
if( secondBasePileup != null )
|
||||
return " " + secondBasePileup;
|
||||
else
|
||||
return " " + Utils.dupString('N', pileup.getBases().length());
|
||||
return " " + Utils.dupString('N', pileup.getBasesAsString().length());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
out.printf("No truth pileup data available at %s%n", pileup.getPileupString());
|
||||
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
||||
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
|
||||
context.getLocation(), pileup.getBases()));
|
||||
context.getLocation(), pileup.getBasesAsString()));
|
||||
}
|
||||
} else {
|
||||
String pileupDiff = BasicPileup.pileupDiff(pileup, truePileup);
|
||||
|
|
|
|||
|
|
@ -11,30 +11,31 @@ import java.util.ArrayList;
|
|||
public class AlleleBalance implements VariantAnnotation {
|
||||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
|
||||
double ratio = -1;
|
||||
|
||||
double ratio;
|
||||
if ( genotypes.size() > 0 ) {
|
||||
Genotype g = genotypes.get(0);
|
||||
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
|
||||
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
|
||||
if ( weightedBalance.second == 0 )
|
||||
return null;
|
||||
ratio = weightedBalance.first;
|
||||
} else {
|
||||
// this test doesn't make sense for homs
|
||||
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
|
||||
if ( genotype == null )
|
||||
return null;
|
||||
|
||||
Genotype g = genotypes.get(0);
|
||||
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
|
||||
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
|
||||
if ( weightedBalance.second == 0 )
|
||||
return null;
|
||||
ratio = weightedBalance.first;
|
||||
} else {
|
||||
// this test doesn't make sense for homs
|
||||
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
|
||||
if ( genotype == null )
|
||||
return null;
|
||||
final String genotypeStr = genotype.getBases().toUpperCase();
|
||||
if ( genotypeStr.length() != 2 )
|
||||
return null;
|
||||
|
||||
final String genotypeStr = genotype.getBases().toUpperCase();
|
||||
if ( genotypeStr.length() != 2 )
|
||||
return null;
|
||||
final String bases = pileup.getBasesAsString().toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
|
||||
final String bases = pileup.getBases().toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
|
||||
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
|
||||
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<String, String>("AlleleBalance", String.format("%.2f", ratio));
|
||||
|
|
|
|||
|
|
@ -14,28 +14,30 @@ public class OnOffGenotype implements VariantAnnotation {
|
|||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
|
||||
|
||||
double ratio;
|
||||
double ratio = -1;
|
||||
|
||||
Genotype g = genotypes.get(0);
|
||||
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) {
|
||||
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
|
||||
if ( weightedBalance.second == 0 )
|
||||
return null;
|
||||
ratio = weightedBalance.first;
|
||||
} else {
|
||||
Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes);
|
||||
if ( genotype == null )
|
||||
return null;
|
||||
if ( genotypes.size() > 0 ) {
|
||||
Genotype g = genotypes.get(0);
|
||||
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked) {
|
||||
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
|
||||
if ( weightedBalance.second == 0 )
|
||||
return null;
|
||||
ratio = weightedBalance.first;
|
||||
} else {
|
||||
Genotype genotype = VariantAnnotator.getFirstVariant(ref.getBase(), genotypes);
|
||||
if ( genotype == null )
|
||||
return null;
|
||||
|
||||
final String genotypeStr = genotype.getBases().toUpperCase();
|
||||
if ( genotypeStr.length() != 2 )
|
||||
return null;
|
||||
final String genotypeStr = genotype.getBases().toUpperCase();
|
||||
if ( genotypeStr.length() != 2 )
|
||||
return null;
|
||||
|
||||
final String bases = pileup.getBases().toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
final String bases = pileup.getBasesAsString().toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
|
||||
ratio = computeSingleBalance(genotypeStr, bases);
|
||||
ratio = computeSingleBalance(genotypeStr, bases);
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<String, String>("OnOffGenotype", String.format("%.2f", ratio));
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class SecondBaseSkew implements VariantAnnotation{
|
|||
}
|
||||
int variantDepth = 0;
|
||||
int variantsWithRefSecondBase = 0;
|
||||
char[] primaryPileup = p.getBases().toCharArray();
|
||||
byte[] primaryPileup = p.getBases();
|
||||
String secondBasePileup = p.getSecondaryBasePileup();
|
||||
|
||||
if ( secondBasePileup == null ) {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
|
|||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
|
||||
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
|
||||
String bases = pileup.getBases();
|
||||
String bases = pileup.getBasesAsString();
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
||||
Genotype genotype = Genotype.values()[genotypeIndex];
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ public class IVFPrimaryBases implements IndependentVariantFeature {
|
|||
likelihoods = new double[10];
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
|
||||
String primaryBases = pileup.getBases();
|
||||
String primaryBases = pileup.getBasesAsString();
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
||||
char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0);
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class IVFSecondaryBases implements IndependentVariantFeature {
|
|||
likelihoods = new double[10];
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
|
||||
String primaryBases = pileup.getBases();
|
||||
String primaryBases = pileup.getBasesAsString();
|
||||
String secondaryBases = pileup.getSecondaryBasePileup();
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class VECAlleleBalance extends RatioFilter {
|
|||
if ( genotype.length() > 2 )
|
||||
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
||||
|
||||
final String bases = pileup.getBases();
|
||||
final String bases = pileup.getBasesAsString();
|
||||
if ( bases.length() == 0 ) {
|
||||
ratio = 0.0;
|
||||
return new Pair<Integer, Integer>(0, 0);
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
|||
if ( genotype.length() > 2 )
|
||||
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
||||
|
||||
final String bases = pileup.getBases();
|
||||
final String bases = pileup.getBasesAsString();
|
||||
if ( bases.length() == 0 ) {
|
||||
ratio = 0.0;
|
||||
return new Pair<Integer, Integer>(0, 0);
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
protected double CONFIDENCE_THRESHOLD;
|
||||
protected double MINIMUM_ALLELE_FREQUENCY;
|
||||
protected double ALLELE_FREQUENCY_RANGE;
|
||||
protected boolean REPORT_SLOD;
|
||||
protected int maxDeletionsInPileup;
|
||||
protected String assumedSingleSample;
|
||||
protected PrintWriter verboseWriter;
|
||||
|
|
@ -78,12 +79,16 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
if ( UAC.VERBOSE != null ) {
|
||||
try {
|
||||
verboseWriter = new PrintWriter(UAC.VERBOSE);
|
||||
initializeVerboseWriter(verboseWriter);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Could not open file " + UAC.VERBOSE + " for writing");
|
||||
}
|
||||
}
|
||||
REPORT_SLOD = ! UAC.NO_SLOD;
|
||||
}
|
||||
|
||||
protected void initializeVerboseWriter(PrintWriter writer) { };
|
||||
|
||||
public void close() {
|
||||
if ( verboseWriter != null )
|
||||
verboseWriter.close();
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
import java.io.PrintWriter;
|
||||
|
||||
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||
|
||||
|
|
@ -61,6 +63,17 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints);
|
||||
}
|
||||
|
||||
protected void initializeVerboseWriter(PrintWriter verboseWriter) {
|
||||
// by default, no initialization is done
|
||||
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
|
||||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
char base = Character.toLowerCase(altAllele);
|
||||
header.append("POfDGivenAFFor" + base + "\t");
|
||||
header.append("PosteriorAFFor" + base + "\t");
|
||||
}
|
||||
verboseWriter.println(header);
|
||||
}
|
||||
|
||||
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
// by default, no initialization is done
|
||||
return;
|
||||
|
|
@ -175,24 +188,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
}
|
||||
|
||||
protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) {
|
||||
|
||||
verboseWriter.println("Location=" + loc + ", ref=" + ref);
|
||||
StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t");
|
||||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele != ref ) {
|
||||
char base = Character.toLowerCase(altAllele);
|
||||
header.append("P(D|AF)_" + base + "\t");
|
||||
header.append("PosteriorAF_" + base + "\t");
|
||||
}
|
||||
}
|
||||
verboseWriter.println(header);
|
||||
|
||||
for (int i = 0; i < frequencyEstimationPoints; i++) {
|
||||
StringBuilder AFline = new StringBuilder(i + "/" + (frequencyEstimationPoints-1) + "\t" + String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t");
|
||||
StringBuilder AFline = new StringBuilder("AFINFO\t");
|
||||
AFline.append(loc).append("\t");
|
||||
AFline.append(i + "/" + (frequencyEstimationPoints-1) + "\t");
|
||||
AFline.append(String.format("%.2f\t", ((float)i)/ (frequencyEstimationPoints-1)));
|
||||
AFline.append(String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t");
|
||||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele != ref ) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i]));
|
||||
} else {
|
||||
AFline.append(String.format("%.8f\t%.8f\t", -1.0, -1.0));
|
||||
}
|
||||
}
|
||||
verboseWriter.println(AFline);
|
||||
|
|
@ -258,7 +265,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
if ( dbsnp != null )
|
||||
((IDBacked)locusdata).setID(dbsnp.getRS_ID());
|
||||
}
|
||||
if ( locusdata instanceof SLODBacked ) {
|
||||
if ( locusdata instanceof SLODBacked && REPORT_SLOD ) {
|
||||
// the overall lod
|
||||
double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
double overallLog10PofF = Math.log10(PofFs[indexOfMax]);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -47,13 +50,49 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
return contexts;
|
||||
}
|
||||
|
||||
protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType));
|
||||
|
||||
System.out.printf("%s %.2f %d%n", alt, f, pileup.getBases().length());
|
||||
// TODO -- IMPLEMENT ME
|
||||
double log10L = 0.0;
|
||||
|
||||
return -100.0;
|
||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg);
|
||||
int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg);
|
||||
|
||||
int nChromosomes = 2 * getNSamples(contexts);
|
||||
int nAltAlleles = (int)(f * nChromosomes);
|
||||
int nRefAlleles = nChromosomes - nAltAlleles;
|
||||
|
||||
double log10POfRef = Math.log10(1 - f);
|
||||
double log10POfAlt = Math.log10(f);
|
||||
//double log10ChromChooseRef = Math.log10(Arithmetic.binomial(nChromosomes, nRefAlleles));
|
||||
//double log10ChromChooseAlt = Math.log10(Arithmetic.binomial(nChromosomes, nAltAlleles));
|
||||
|
||||
byte[] bases = pileup.getBases();
|
||||
byte[] quals = pileup.getQuals();
|
||||
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
int bIndex = BaseUtils.simpleBaseToBaseIndex((char)bases[i]);
|
||||
byte qual = quals[i];
|
||||
if ( qual > 0 && bIndex != -1 ) {
|
||||
double log10POfB = Math.log10(QualityUtils.qualToProb(qual));
|
||||
double log10POfNotB = Math.log10(QualityUtils.qualToErrorProb(qual));
|
||||
//System.out.printf("%f %f %f %d%n", log10L, log10POfB, log10POfNotB, qual);
|
||||
|
||||
if ( bIndex == refIndex && nRefAlleles > 0 ) {
|
||||
log10L += log10POfRef + log10POfB;
|
||||
} else if ( bIndex == altIndex && nAltAlleles > 0) {
|
||||
log10L += log10POfAlt + log10POfB;
|
||||
} else {
|
||||
log10L += log10POfNotB;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
|
||||
context.getContext(StratifiedContext.OVERALL).getLocation(),
|
||||
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
|
||||
|
||||
return log10L;
|
||||
}
|
||||
}
|
||||
|
|
@ -44,6 +44,8 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false)
|
||||
public int POOLSIZE = 0;
|
||||
|
||||
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
|
||||
public boolean NO_SLOD = false;
|
||||
|
||||
// control the output
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
|
||||
|
|
|
|||
|
|
@ -334,13 +334,13 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
int refM = 0;
|
||||
|
||||
switch (Character.toUpperCase(p.getRef())) {
|
||||
case 'A': refM = BasicPileup.countBases(p.getBases()).a;
|
||||
case 'A': refM = BasicPileup.countBases(p.getBasesAsString()).a;
|
||||
break;
|
||||
case 'C': refM = BasicPileup.countBases(p.getBases()).c;
|
||||
case 'C': refM = BasicPileup.countBases(p.getBasesAsString()).c;
|
||||
break;
|
||||
case 'G': refM = BasicPileup.countBases(p.getBases()).g;
|
||||
case 'G': refM = BasicPileup.countBases(p.getBasesAsString()).g;
|
||||
break;
|
||||
case 'T': refM = BasicPileup.countBases(p.getBases()).t;
|
||||
case 'T': refM = BasicPileup.countBases(p.getBasesAsString()).t;
|
||||
break;
|
||||
default:
|
||||
throw new StingException("Exception in countMismatches - this has been called for a non-reference base. Base character from pileup was " + p.getRef() );
|
||||
|
|
@ -362,7 +362,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
}
|
||||
|
||||
public boolean pileupContainsNoNs(ReadBackedPileup pileup) {
|
||||
for ( char c : pileup.getBases().toCharArray() ) {
|
||||
for ( byte c : pileup.getBases() ) {
|
||||
if ( c == 'N' ) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
|||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||
|
||||
for (byte base : pileup.getBases().getBytes()) {
|
||||
for (byte base : pileup.getBases() ) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex((char) base);
|
||||
|
||||
if (baseIndex != refIndex) {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ abstract public class BasicPileup implements Pileup {
|
|||
|
||||
public String getPileupString()
|
||||
{
|
||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBases(), getQuals());
|
||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
}
|
||||
|
||||
public static List<Integer> constantOffset( List<SAMRecord> reads, int i ) {
|
||||
|
|
@ -43,7 +43,7 @@ abstract public class BasicPileup implements Pileup {
|
|||
|
||||
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
StringBuilder bases = new StringBuilder();
|
||||
for ( byte base : basePileup(reads, offsets, includeDeletions)) {
|
||||
for ( byte base : getBasesAsArrayList(reads, offsets, includeDeletions)) {
|
||||
bases.append((char)base);
|
||||
}
|
||||
return bases.toString();
|
||||
|
|
@ -81,11 +81,33 @@ abstract public class BasicPileup implements Pileup {
|
|||
return bases.toString();
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return basePileup( reads, offsets, false );
|
||||
//
|
||||
// byte[] methods
|
||||
//
|
||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return ArrayList2byte(getBasesAsArrayList( reads, offsets ));
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions ));
|
||||
}
|
||||
|
||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return ArrayList2byte(getQualsAsArrayList( reads, offsets ));
|
||||
}
|
||||
|
||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions ));
|
||||
}
|
||||
|
||||
//
|
||||
// ArrayList<Byte> methods
|
||||
//
|
||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return getBasesAsArrayList( reads, offsets, false );
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
|
|
@ -100,16 +122,24 @@ abstract public class BasicPileup implements Pileup {
|
|||
return bases;
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return getQualsAsArrayList( reads, offsets, false );
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
||||
// skip deletion sites
|
||||
if ( offset == -1 )
|
||||
continue;
|
||||
byte qual = (byte)read.getBaseQualities()[offset];
|
||||
quals.add(qual);
|
||||
if ( offset == -1 ) {
|
||||
if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
|
||||
quals.add((byte)0);
|
||||
} else {
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
quals.add(qual);
|
||||
}
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
|
@ -128,6 +158,14 @@ abstract public class BasicPileup implements Pileup {
|
|||
return quals2String(mappingQualPileup(reads));
|
||||
}
|
||||
|
||||
private static byte[] ArrayList2byte(ArrayList<Byte> ab) {
|
||||
byte[] ret = new byte[ab.size()];
|
||||
int i = 0;
|
||||
for (byte e : ab)
|
||||
ret[i++] = e;
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static String quals2String( List<Byte> quals ) {
|
||||
StringBuilder qualStr = new StringBuilder();
|
||||
for ( int qual : quals ) {
|
||||
|
|
@ -140,10 +178,11 @@ abstract public class BasicPileup implements Pileup {
|
|||
}
|
||||
|
||||
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return quals2String(qualPileup(reads, offsets));
|
||||
return quals2String(getQualsAsArrayList(reads, offsets));
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> secondaryBasePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
// todo -- there are probably bugs in here due to includeDeletion flags
|
||||
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
|
||||
boolean hasAtLeastOneSQorE2Field = false;
|
||||
|
||||
|
|
@ -184,13 +223,13 @@ abstract public class BasicPileup implements Pileup {
|
|||
return (hasAtLeastOneSQorE2Field ? bases2 : null);
|
||||
}
|
||||
|
||||
public static String secondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
StringBuilder bases2 = new StringBuilder();
|
||||
ArrayList<Byte> sbases = secondaryBasePileup(reads, offsets);
|
||||
ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
|
||||
|
||||
if (sbases == null) { return null; }
|
||||
|
||||
ArrayList<Byte> pbases = basePileup(reads, offsets,true);
|
||||
ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets,true);
|
||||
|
||||
Random generator = new Random();
|
||||
|
||||
|
|
@ -391,13 +430,13 @@ abstract public class BasicPileup implements Pileup {
|
|||
if ( a.getLocation().compareTo(b.getLocation()) != 0 )
|
||||
return "Locations not equal";
|
||||
|
||||
String aBases = maybeSorted(a.getBases(), ! orderDependent );
|
||||
String bBases = maybeSorted(b.getBases(), ! orderDependent );
|
||||
String aBases = maybeSorted(a.getBasesAsString(), ! orderDependent );
|
||||
String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent );
|
||||
if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) )
|
||||
return "Bases not equal";
|
||||
|
||||
String aQuals = maybeSorted(a.getQuals(), ! orderDependent );
|
||||
String bQuals = maybeSorted(b.getQuals(), ! orderDependent );
|
||||
String aQuals = maybeSorted(a.getQualsAsString(), ! orderDependent );
|
||||
String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent );
|
||||
if ( ! aQuals.equals(bQuals) )
|
||||
return "Quals not equal";
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
|
|
@ -10,8 +12,16 @@ package org.broadinstitute.sting.utils;
|
|||
public interface Pileup {
|
||||
GenomeLoc getLocation();
|
||||
char getRef();
|
||||
String getBases();
|
||||
String getQuals();
|
||||
|
||||
// byte array
|
||||
byte[] getBases();
|
||||
byte[] getQuals();
|
||||
|
||||
ArrayList<Byte> getBasesAsArrayList();
|
||||
ArrayList<Byte> getQualsAsArrayList();
|
||||
|
||||
String getBasesAsString();
|
||||
String getQualsAsString();
|
||||
String getPileupString();
|
||||
int size();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -33,7 +35,32 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
this.offsets = offsets;
|
||||
}
|
||||
|
||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||
if ( getNumberOfDeletions() > 0 ) {
|
||||
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
|
||||
List<Integer> newOffsets = new ArrayList<Integer>();
|
||||
|
||||
for ( int i = 0; i < size(); i++ ) {
|
||||
if ( getOffsets().get(i) != -1 ) {
|
||||
newReads.add(getReads().get(i));
|
||||
newOffsets.add(getOffsets().get(i));
|
||||
}
|
||||
}
|
||||
return new ReadBackedPileup(loc, ref, newReads, newOffsets);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public int getNumberOfDeletions() {
|
||||
int n = 0;
|
||||
|
||||
for ( int i = 0; i < size(); i++ ) {
|
||||
if ( getOffsets().get(i) != -1 ) { n++; }
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
public int size() { return reads.size(); }
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
|
|
@ -47,7 +74,16 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
return ref;
|
||||
}
|
||||
|
||||
public String getBases() {
|
||||
public byte[] getBases() {
|
||||
return getBases(reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
public byte[] getQuals() {
|
||||
return getQuals(reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
|
||||
public String getBasesAsString() {
|
||||
return basePileupAsString(reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
|
|
@ -55,13 +91,21 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
return baseWithStrandPileupAsString(reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
public String getQuals() {
|
||||
public ArrayList<Byte> getBasesAsArrayList() {
|
||||
return getBasesAsArrayList(reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
public ArrayList<Byte> getQualsAsArrayList() {
|
||||
return getQualsAsArrayList(reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
public String getQualsAsString() {
|
||||
return qualPileupAsString(reads, offsets);
|
||||
}
|
||||
|
||||
public String getQualsAsInts() {
|
||||
//System.out.printf("getQualsAsInts");
|
||||
return Utils.join(",", qualPileup(reads, offsets));
|
||||
return Utils.join(",", getQualsAsArrayList(reads, offsets));
|
||||
}
|
||||
|
||||
public String getMappingQualsAsInts() {
|
||||
|
|
@ -73,7 +117,7 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
}
|
||||
|
||||
public String getSecondaryBasePileup() {
|
||||
return secondaryBasePileupAsString(reads, offsets);
|
||||
return getSecondaryBasePileupAsString(reads, offsets);
|
||||
}
|
||||
|
||||
public String getSecondaryQualPileup() {
|
||||
|
|
@ -121,8 +165,8 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
return String.format("%s %s %s %s %s %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
getRef(), // reference base
|
||||
getBases(),
|
||||
qualsAsInts ? getQualsAsInts() : getQuals(),
|
||||
getBasesAsString(),
|
||||
qualsAsInts ? getQualsAsInts() : getQualsAsString(),
|
||||
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
|
||||
}
|
||||
|
||||
|
|
@ -131,7 +175,7 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
getRef(), // reference base
|
||||
getBasesWithStrand(),
|
||||
qualsAsInts ? getQualsAsInts() : getQuals(),
|
||||
qualsAsInts ? getQualsAsInts() : getQualsAsString(),
|
||||
qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -178,8 +178,8 @@ public class DupUtils {
|
|||
|
||||
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
|
||||
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
|
||||
ArrayList<Byte> bases = BasicPileup.basePileup(duplicates, offsets);
|
||||
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
|
||||
ArrayList<Byte> bases = BasicPileup.getBasesAsArrayList(duplicates, offsets);
|
||||
ArrayList<Byte> quals = BasicPileup.getQualsAsArrayList(duplicates, offsets);
|
||||
final boolean debug = false;
|
||||
|
||||
// calculate base probs
|
||||
|
|
@ -205,59 +205,4 @@ public class DupUtils {
|
|||
(char)(byte)combined.getFirst(), combined.getSecond());
|
||||
return combined;
|
||||
}
|
||||
|
||||
|
||||
private static Pair<Byte, Byte> combineDupBasesAtI(List<SAMRecord> duplicates, int readOffset) {
|
||||
//System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset);
|
||||
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
|
||||
ArrayList<Byte> bases = BasicPileup.basePileup(duplicates, offsets);
|
||||
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
|
||||
|
||||
// start the recursion
|
||||
byte combinedBase = bases.get(0);
|
||||
int combinedQual = quals.get(0);
|
||||
for ( int i = 1; i < bases.size(); i++ ) {
|
||||
// recursively combine evidence
|
||||
byte nextBase = bases.get(i);
|
||||
byte nextQual = quals.get(i);
|
||||
Pair<Byte, Integer> combinedI = combine2BasesAndQuals(combinedBase, nextBase, combinedQual, nextQual);
|
||||
|
||||
if ( false )
|
||||
System.out.printf("Combining at %d: %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n",
|
||||
i,
|
||||
(char)combinedBase, combinedQual,
|
||||
(char)nextBase, nextQual,
|
||||
(char)((byte)combinedI.getFirst()), combinedI.getSecond(),
|
||||
combinedBase == nextBase ? "" : " [MISMATCH]");
|
||||
combinedBase = combinedI.getFirst();
|
||||
combinedQual = combinedI.getSecond();
|
||||
}
|
||||
|
||||
if ( false )
|
||||
System.out.printf("%s %s => %c Q%s => Q%d%n",
|
||||
BasicPileup.basePileupAsString(duplicates, offsets),
|
||||
BasicPileup.qualPileupAsString(duplicates, offsets),
|
||||
(char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual));
|
||||
return new Pair<Byte, Byte>(combinedBase, QualityUtils.boundQual(combinedQual));
|
||||
}
|
||||
|
||||
private static Pair<Byte, Byte> combineBasesByConsensus(List<SAMRecord> duplicates, int readOffset) {
|
||||
//System.out.printf("combineDupBasesAtI() dups=%s, readOffset=%d%n", duplicates.size(), readOffset);
|
||||
List<Integer> offsets = BasicPileup.constantOffset(duplicates, readOffset);
|
||||
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
|
||||
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
|
||||
byte combinedBase = BasicPileup.consensusBase(bases);
|
||||
int baseMatches = Utils.countOccurrences((char)combinedBase, bases);
|
||||
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
|
||||
double mismatchRate = (double)baseMatches / bases.length();
|
||||
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);
|
||||
|
||||
if ( false )
|
||||
System.out.printf("%s %s => %c Q%s => Q%d%n",
|
||||
BasicPileup.basePileupAsString(duplicates, offsets),
|
||||
BasicPileup.qualPileupAsString(duplicates, offsets),
|
||||
(char)combinedBase, combinedQual, QualityUtils.boundQual(combinedQual));
|
||||
|
||||
return new Pair<Byte, Byte>(combinedBase, QualityUtils.boundQual(combinedQual));
|
||||
}
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void secondBaseSkewTest() {
|
||||
for ( int test = 1; test <= 5; test ++ ) {
|
||||
for ( int test = 1; test <= 1; test ++ ) {
|
||||
String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam";
|
||||
String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls";
|
||||
String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant";
|
||||
|
|
|
|||
Loading…
Reference in New Issue