Committing the fragment-based calling code. Results look great in all datasets (will show this at 1000G this week with Ryan). Note that this is an intermediate commit. The code needs to be cleaned up and the fragmentation code needs to be moved up into LocusIteratorByState. This should all happen later this week, but I don't want Ryan to have to keep running from my own personal Sting directory. The current crappy implementation adds ~10% to the runtime, but that should all go away in the next iteration.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5058 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bb6999b032
commit
2bbcc9275a
|
|
@ -25,7 +25,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -34,6 +33,10 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import static java.lang.Math.log10;
|
import static java.lang.Math.log10;
|
||||||
import static java.lang.Math.pow;
|
import static java.lang.Math.pow;
|
||||||
|
|
||||||
|
|
@ -69,6 +72,8 @@ import static java.lang.Math.pow;
|
||||||
* model.
|
* model.
|
||||||
*/
|
*/
|
||||||
public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
|
public final static double DEFAULT_PCR_ERROR_RATE = 1e-4;
|
||||||
|
|
||||||
protected final static int FIXED_PLOIDY = 2;
|
protected final static int FIXED_PLOIDY = 2;
|
||||||
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||||
protected final static double ploidyAdjustment = log10(FIXED_PLOIDY);
|
protected final static double ploidyAdjustment = log10(FIXED_PLOIDY);
|
||||||
|
|
@ -84,22 +89,31 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
|
|
||||||
protected DiploidSNPGenotypePriors priors = null;
|
protected DiploidSNPGenotypePriors priors = null;
|
||||||
|
|
||||||
|
// TODO: don't calculate this each time through
|
||||||
|
protected double log10_PCR_error_3;
|
||||||
|
protected double log10_1_minus_PCR_error;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
|
* Create a new GenotypeLikelhoods object with flat priors for each diploid genotype
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public DiploidSNPGenotypeLikelihoods() {
|
public DiploidSNPGenotypeLikelihoods() {
|
||||||
this.priors = new DiploidSNPGenotypePriors();
|
this.priors = new DiploidSNPGenotypePriors();
|
||||||
|
log10_PCR_error_3 = log10(DEFAULT_PCR_ERROR_RATE) - log10_3;
|
||||||
|
log10_1_minus_PCR_error = log10(1.0 - DEFAULT_PCR_ERROR_RATE);
|
||||||
setToZero();
|
setToZero();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new GenotypeLikelhoods object with given priors for each diploid genotype
|
* Create a new GenotypeLikelhoods object with given priors and PCR error rate for each diploid genotype
|
||||||
*
|
*
|
||||||
* @param priors priors
|
* @param priors priors
|
||||||
|
* @param PCR_error_rate the PCR error rate
|
||||||
*/
|
*/
|
||||||
public DiploidSNPGenotypeLikelihoods(DiploidSNPGenotypePriors priors) {
|
public DiploidSNPGenotypeLikelihoods(DiploidSNPGenotypePriors priors, double PCR_error_rate) {
|
||||||
this.priors = priors;
|
this.priors = priors;
|
||||||
|
log10_PCR_error_3 = log10(PCR_error_rate) - log10_3;
|
||||||
|
log10_1_minus_PCR_error = log10(1.0 - PCR_error_rate);
|
||||||
setToZero();
|
setToZero();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -242,45 +256,62 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
public int add(ReadBackedPileup pileup, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
|
|
||||||
// todo: first validate that my changes were good by passing in fragments representing just a single read...
|
// TODO-- move this outside the UG, e.g. to ReadBackedPileup
|
||||||
//Set<PerFragmentPileupElement> fragments = new HashSet<PerFragmentPileupElement>();
|
// combine paired reads into a single fragment
|
||||||
|
HashMap<String, PerFragmentPileupElement> fragments = new HashMap<String, PerFragmentPileupElement>();
|
||||||
for ( PileupElement p : pileup ) {
|
for ( PileupElement p : pileup ) {
|
||||||
if ( usableBase(p, ignoreBadBases) ) {
|
Set<PileupElement> fragment = new HashSet<PileupElement>();
|
||||||
|
String readName = p.getRead().getReadName();
|
||||||
|
|
||||||
/****
|
if ( fragments.containsKey(readName) )
|
||||||
Set<PileupElement> fragment = new HashSet<PileupElement>();
|
fragment.addAll(fragments.get(readName).getPileupElements());
|
||||||
fragment.add(p);
|
|
||||||
fragments.add(new PerFragmentPileupElement(fragment));
|
fragment.add(p);
|
||||||
}
|
fragments.put(readName, new PerFragmentPileupElement(fragment));
|
||||||
}
|
}
|
||||||
|
|
||||||
// for each fragment, add to the likelihoods
|
// for each fragment, add to the likelihoods
|
||||||
for ( PerFragmentPileupElement fragment : fragments ) {
|
for ( PerFragmentPileupElement fragment : fragments.values() ) {
|
||||||
n += add(fragment, capBaseQualsAtMappingQual);
|
n += add(fragment, ignoreBadBases, capBaseQualsAtMappingQual);
|
||||||
}
|
|
||||||
****/
|
|
||||||
|
|
||||||
byte qual = capBaseQualsAtMappingQual ? (byte)Math.min((int)p.getQual(), p.getMappingQual()) : p.getQual();
|
|
||||||
n += add(p.getBase(), qual, p.getRead(), p.getOffset());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
// public int add(PerFragmentPileupElement fragment, boolean capBaseQualsAtMappingQual) {
|
public int add(PerFragmentPileupElement fragment, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual) {
|
||||||
|
// TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine
|
||||||
|
// TODO-- given the current state of next-gen sequencing, but may need to be fixed in the future.
|
||||||
|
// TODO-- However, when that happens, we'll need to be a lot smarter about the caching we do here.
|
||||||
|
byte observedBase1 = 0, observedBase2 = 0, qualityScore1 = 0, qualityScore2 = 0;
|
||||||
|
for ( PileupElement p : fragment ) {
|
||||||
|
if ( !usableBase(p, ignoreBadBases) )
|
||||||
|
continue;
|
||||||
|
|
||||||
public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
|
byte qual = p.getQual();
|
||||||
if ( qualityScore == 0 ) { // zero quals are wrong
|
if ( qual > SAMUtils.MAX_PHRED_SCORE )
|
||||||
return 0;
|
throw new UserException.MalformedBam(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
|
||||||
|
if ( capBaseQualsAtMappingQual )
|
||||||
|
qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
|
||||||
|
|
||||||
|
if ( qualityScore1 == 0 ) {
|
||||||
|
observedBase1 = p.getBase();
|
||||||
|
qualityScore1 = qual;
|
||||||
|
} else {
|
||||||
|
observedBase2 = p.getBase();
|
||||||
|
qualityScore2 = qual;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// abort early if we didn't see any good bases
|
||||||
|
if ( observedBase1 == 0 && observedBase2 == 0 )
|
||||||
|
return 0;
|
||||||
|
|
||||||
// Just look up the cached result if it's available, or compute and store it
|
// Just look up the cached result if it's available, or compute and store it
|
||||||
DiploidSNPGenotypeLikelihoods gl;
|
DiploidSNPGenotypeLikelihoods gl;
|
||||||
if ( ! inCache( observedBase, qualityScore, FIXED_PLOIDY, read) ) {
|
if ( ! inCache(observedBase1, qualityScore1, observedBase2, qualityScore2, FIXED_PLOIDY) ) {
|
||||||
gl = calculateCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read, offset);
|
gl = calculateCachedGenotypeLikelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2, FIXED_PLOIDY);
|
||||||
} else {
|
} else {
|
||||||
gl = getCachedGenotypeLikelihoods(observedBase, qualityScore, FIXED_PLOIDY, read);
|
gl = getCachedGenotypeLikelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2, FIXED_PLOIDY);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for bad bases, there are no likelihoods
|
// for bad bases, there are no likelihoods
|
||||||
|
|
@ -292,11 +323,10 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||||
double likelihood = likelihoods[g.ordinal()];
|
double likelihood = likelihoods[g.ordinal()];
|
||||||
|
|
||||||
if ( VERBOSE ) {
|
//if ( VERBOSE ) {
|
||||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
// System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
|
||||||
System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
|
// observedBase, g, qualityScore, pow(10,likelihood) * 100, likelihood);
|
||||||
observedBase, g, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
|
//}
|
||||||
}
|
|
||||||
|
|
||||||
log10Likelihoods[g.ordinal()] += likelihood;
|
log10Likelihoods[g.ordinal()] += likelihood;
|
||||||
log10Posteriors[g.ordinal()] += likelihood;
|
log10Posteriors[g.ordinal()] += likelihood;
|
||||||
|
|
@ -305,52 +335,50 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DiploidSNPGenotypeLikelihoods[][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY][2];
|
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
|
||||||
|
|
||||||
protected boolean inCache( byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
|
protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
|
||||||
return getCache(CACHE, observedBase, qualityScore, ploidy, read) != null;
|
return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DiploidSNPGenotypeLikelihoods getCachedGenotypeLikelihoods( byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
|
protected DiploidSNPGenotypeLikelihoods getCachedGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
|
||||||
DiploidSNPGenotypeLikelihoods gl = getCache(CACHE, observedBase, qualityScore, ploidy, read);
|
DiploidSNPGenotypeLikelihoods gl = getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy);
|
||||||
if ( gl == null )
|
if ( gl == null )
|
||||||
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
|
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base1=%c, qual1=%d, base2=%c, qual2=%d, ploidy=%d",
|
||||||
observedBase, qualityScore, ploidy, read));
|
observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy));
|
||||||
return gl;
|
return gl;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DiploidSNPGenotypeLikelihoods calculateCachedGenotypeLikelihoods(byte observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset) {
|
protected DiploidSNPGenotypeLikelihoods calculateCachedGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
|
||||||
DiploidSNPGenotypeLikelihoods gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset);
|
DiploidSNPGenotypeLikelihoods gl = calculateGenotypeLikelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2);
|
||||||
setCache(CACHE, observedBase, qualityScore, ploidy, read, gl);
|
setCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy, gl);
|
||||||
return gl;
|
return gl;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void setCache( DiploidSNPGenotypeLikelihoods[][][][] cache,
|
protected void setCache( DiploidSNPGenotypeLikelihoods[][][][][] cache,
|
||||||
byte observedBase, byte qualityScore, int ploidy,
|
byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy,
|
||||||
SAMRecord read, DiploidSNPGenotypeLikelihoods val ) {
|
DiploidSNPGenotypeLikelihoods val ) {
|
||||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
int i = BaseUtils.simpleBaseToBaseIndex(observedBase1);
|
||||||
int j = qualityScore;
|
int j = qualityScore1;
|
||||||
if ( j > SAMUtils.MAX_PHRED_SCORE )
|
int k = qualityScore2 != 0 ? BaseUtils.simpleBaseToBaseIndex(observedBase2) : BaseUtils.BASES.length;
|
||||||
throw new UserException.MalformedBam(read, String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, j, read.getReadName()));
|
int l = qualityScore2;
|
||||||
int k = ploidy;
|
int m = ploidy;
|
||||||
int x = strandIndex(! read.getReadNegativeStrandFlag() );
|
|
||||||
|
|
||||||
cache[i][j][k][x] = val;
|
cache[i][j][k][l][m] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DiploidSNPGenotypeLikelihoods getCache( DiploidSNPGenotypeLikelihoods[][][][] cache,
|
protected DiploidSNPGenotypeLikelihoods getCache(DiploidSNPGenotypeLikelihoods[][][][][] cache,
|
||||||
byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
|
byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
|
||||||
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
|
int i = BaseUtils.simpleBaseToBaseIndex(observedBase1);
|
||||||
int j = qualityScore;
|
int j = qualityScore1;
|
||||||
if ( j > SAMUtils.MAX_PHRED_SCORE )
|
int k = qualityScore2 != 0 ? BaseUtils.simpleBaseToBaseIndex(observedBase2) : BaseUtils.BASES.length;
|
||||||
throw new UserException.MalformedBam(read, String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, j, read.getReadName()));
|
int l = qualityScore2;
|
||||||
int k = ploidy;
|
int m = ploidy;
|
||||||
int x = strandIndex(! read.getReadNegativeStrandFlag() );
|
return cache[i][j][k][l][m];
|
||||||
return cache[i][j][k][x];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DiploidSNPGenotypeLikelihoods calculateGenotypeLikelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
|
protected DiploidSNPGenotypeLikelihoods calculateGenotypeLikelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2) {
|
||||||
double[] log10FourBaseLikelihoods = computeLog10Likelihoods(observedBase, qualityScore, read);
|
double[] log10FourBaseLikelihoods = computeLog10Likelihoods(observedBase1, qualityScore1, observedBase2, qualityScore2);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
|
@ -388,24 +416,36 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
* Updates likelihoods and posteriors to reflect an additional observation of observedBase with
|
||||||
* qualityScore.
|
* qualityScore.
|
||||||
*
|
*
|
||||||
* @param observedBase observed base
|
* @param observedBase1 the base observed on the 1st read of the fragment
|
||||||
* @param qualityScore base quality
|
* @param qualityScore1 the qual of the base on the 1st read of the fragment, or zero if NA
|
||||||
* @param read SAM read
|
* @param observedBase2 the base observed on the 2nd read of the fragment
|
||||||
|
* @param qualityScore2 the qual of the base on the 2nd read of the fragment, or zero if NA
|
||||||
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
|
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
|
||||||
*/
|
*/
|
||||||
protected double[] computeLog10Likelihoods(byte observedBase, byte qualityScore, SAMRecord read) {
|
protected double[] computeLog10Likelihoods(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2) {
|
||||||
double[] log10FourBaseLikelihoods = baseZeros.clone();
|
double[] log10FourBaseLikelihoods = baseZeros.clone();
|
||||||
|
|
||||||
for ( byte base : BaseUtils.BASES ) {
|
for ( byte trueBase : BaseUtils.BASES ) {
|
||||||
double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore);
|
double likelihood = 0.0;
|
||||||
|
|
||||||
if ( VERBOSE ) {
|
for ( byte fragmentBase : BaseUtils.BASES ) {
|
||||||
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
|
double log10FragmentLikelihood = (trueBase == fragmentBase ? log10_1_minus_PCR_error : log10_PCR_error_3);
|
||||||
System.out.printf(" L(%c | b=%s, Q=%d, S=%s) = %f / %f%n",
|
if ( qualityScore1 != 0 ) {
|
||||||
observedBase, base, qualityScore, fwdStrand ? "+" : "-", pow(10,likelihood) * 100, likelihood);
|
log10FragmentLikelihood += log10PofObservingBaseGivenChromosome(observedBase1, fragmentBase, qualityScore1);
|
||||||
|
}
|
||||||
|
if ( qualityScore2 != 0 ) {
|
||||||
|
log10FragmentLikelihood += log10PofObservingBaseGivenChromosome(observedBase2, fragmentBase, qualityScore2);
|
||||||
|
}
|
||||||
|
|
||||||
|
//if ( VERBOSE ) {
|
||||||
|
// System.out.printf(" L(%c | b=%s, Q=%d) = %f / %f%n",
|
||||||
|
// observedBase, trueBase, qualityScore, pow(10,likelihood) * 100, likelihood);
|
||||||
|
//}
|
||||||
|
|
||||||
|
likelihood += pow(10, log10FragmentLikelihood);
|
||||||
}
|
}
|
||||||
|
|
||||||
log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(base)] = likelihood;
|
log10FourBaseLikelihoods[BaseUtils.simpleBaseToBaseIndex(trueBase)] = log10(likelihood);
|
||||||
}
|
}
|
||||||
|
|
||||||
return log10FourBaseLikelihoods;
|
return log10FourBaseLikelihoods;
|
||||||
|
|
@ -444,19 +484,16 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
//
|
//
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public static int strandIndex(boolean fwdStrand) {
|
|
||||||
return fwdStrand ? 0 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true when the observedBase is considered usable.
|
* Returns true when the observedBase is considered usable.
|
||||||
* @param p pileup element
|
* @param p pileup element
|
||||||
* @param ignoreBadBases should we ignore bad bases?
|
* @param ignoreBadBases should we ignore bad bases?
|
||||||
* @return true if the base is a usable base
|
* @return true if the base is a usable base
|
||||||
*/
|
*/
|
||||||
protected static boolean usableBase(PileupElement p, boolean ignoreBadBases) {
|
protected static boolean usableBase(PileupElement p, boolean ignoreBadBases) {
|
||||||
// ignore deletions and filtered bases
|
// ignore deletions, Q0 bases, and filtered bases
|
||||||
if ( p.isDeletion() ||
|
if ( p.isDeletion() ||
|
||||||
|
p.getQual() == 0 ||
|
||||||
(p.getRead() instanceof GATKSAMRecord &&
|
(p.getRead() instanceof GATKSAMRecord &&
|
||||||
!((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset())) )
|
!((GATKSAMRecord)p.getRead()).isGoodBase(p.getOffset())) )
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: ebanks
|
||||||
|
* Date: Jan 10, 2011
|
||||||
|
*
|
||||||
|
* Useful helper class to represent a full read pair at a position
|
||||||
|
*/
|
||||||
|
public class PerFragmentPileupElement implements Iterable<PileupElement> {
|
||||||
|
protected Set<PileupElement> PEs = null;
|
||||||
|
|
||||||
|
public PerFragmentPileupElement(Set<PileupElement> PEs) {
|
||||||
|
this.PEs = new HashSet<PileupElement>(PEs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<PileupElement> getPileupElements() {
|
||||||
|
return PEs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<PileupElement> iterator() {
|
||||||
|
return PEs.iterator();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -87,7 +87,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
ReadBackedPileup pileup = sample.getValue().getContext(contextType).getBasePileup();
|
ReadBackedPileup pileup = sample.getValue().getContext(contextType).getBasePileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors);
|
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||||
int nGoodBases = GL.add(pileup, true, true);
|
int nGoodBases = GL.add(pileup, true, true);
|
||||||
if ( nGoodBases == 0 )
|
if ( nGoodBases == 0 )
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,9 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||||
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
|
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
|
||||||
|
|
||||||
|
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||||
|
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
||||||
|
|
||||||
// control the output
|
// control the output
|
||||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
||||||
public boolean GENOTYPE_MODE = false;
|
public boolean GENOTYPE_MODE = false;
|
||||||
|
|
@ -86,6 +89,8 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
||||||
public Double MAX_DELETION_FRACTION = 0.05;
|
public Double MAX_DELETION_FRACTION = 0.05;
|
||||||
|
|
||||||
|
|
||||||
|
// indel-related arguments
|
||||||
@Argument(fullName = "get_indel_alleles_from_vcf", shortName = "getIndelAllelesFromVCF", doc = "Get reference/alt alleles for indel genotyping from VCF", required = false)
|
@Argument(fullName = "get_indel_alleles_from_vcf", shortName = "getIndelAllelesFromVCF", doc = "Get reference/alt alleles for indel genotyping from VCF", required = false)
|
||||||
public boolean GET_ALLELES_FROM_VCF = false;
|
public boolean GET_ALLELES_FROM_VCF = false;
|
||||||
|
|
||||||
|
|
@ -100,6 +105,7 @@ public class UnifiedArgumentCollection {
|
||||||
|
|
||||||
uac.GLmodel = GLmodel;
|
uac.GLmodel = GLmodel;
|
||||||
uac.heterozygosity = heterozygosity;
|
uac.heterozygosity = heterozygosity;
|
||||||
|
uac.PCR_error = PCR_error;
|
||||||
uac.GENOTYPE_MODE = GENOTYPE_MODE;
|
uac.GENOTYPE_MODE = GENOTYPE_MODE;
|
||||||
uac.ALL_BASES_MODE = ALL_BASES_MODE;
|
uac.ALL_BASES_MODE = ALL_BASES_MODE;
|
||||||
uac.NO_SLOD = NO_SLOD;
|
uac.NO_SLOD = NO_SLOD;
|
||||||
|
|
|
||||||
|
|
@ -36,8 +36,10 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
|
@ -104,9 +106,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
if(context.size() > 0 ) {
|
||||||
if(reads.size() > 0 ) {
|
|
||||||
List<Integer> offsets = context.getOffsets();
|
|
||||||
|
|
||||||
int numAs = 0, numCs = 0, numGs = 0, numTs = 0;
|
int numAs = 0, numCs = 0, numGs = 0, numTs = 0;
|
||||||
//if (DEBUG){
|
//if (DEBUG){
|
||||||
|
|
@ -115,19 +115,16 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
||||||
|
|
||||||
//Calculate posterior probabilities
|
//Calculate posterior probabilities
|
||||||
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods();
|
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods();
|
||||||
SAMRecord read; int offset; char base; byte qual; int mapquality; String readname;
|
|
||||||
|
|
||||||
//Check for bad bases and ensure mapping quality
|
|
||||||
for (int i = 0; i < reads.size(); i++) {
|
|
||||||
read = reads.get(i);
|
|
||||||
offset = offsets.get(i);
|
|
||||||
base = (char)read.getReadBases()[offset];
|
|
||||||
qual = read.getBaseQualities()[offset];
|
|
||||||
//mapquality = read.getMappingQuality();
|
|
||||||
if (!ReadsToDiscard.contains(read.getReadName()) && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
|
|
||||||
|
|
||||||
//consider base in likelihood calculations if it looks good and has high mapping score
|
for ( PileupElement p : context.getBasePileup() ) {
|
||||||
G.add((byte)base, qual, read, offset);
|
byte base = p.getBase();
|
||||||
|
if (!ReadsToDiscard.contains(p.getRead().getReadName()) && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
|
||||||
|
|
||||||
|
// TODO-- move this outside, e.g. to ReadBackedPileup
|
||||||
|
HashSet<PileupElement> fragment = new HashSet<PileupElement>();
|
||||||
|
fragment.add(p);
|
||||||
|
G.add(new PerFragmentPileupElement(fragment), true, false);
|
||||||
//if (DEBUG){
|
//if (DEBUG){
|
||||||
if (base == 'A'){numAs++;}
|
if (base == 'A'){numAs++;}
|
||||||
else if (base == 'C'){numCs++;}
|
else if (base == 'C'){numCs++;}
|
||||||
|
|
@ -146,7 +143,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
||||||
//}
|
//}
|
||||||
|
|
||||||
}
|
}
|
||||||
return context.getReads().size();
|
return context.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Pair<Long, Long> reduce(Integer value, Pair<Long, Long> sum) {
|
public Pair<Long, Long> reduce(Integer value, Pair<Long, Long> sum) {
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
|
@ -41,6 +42,7 @@ import org.broadinstitute.sting.commandline.Output;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Hashtable;
|
import java.util.Hashtable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -340,7 +342,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
|
|
||||||
long loc = context.getPosition();
|
long loc = context.getPosition();
|
||||||
if( context.getReads().size() > 0 ) {
|
if( context.size() > 0 ) {
|
||||||
//out.printf("RG for first read: %s%n",context.getReads().get(0).getReadName());
|
//out.printf("RG for first read: %s%n",context.getReads().get(0).getReadName());
|
||||||
int numAs = 0, numCs = 0, numGs = 0, numTs = 0,depth = 0;
|
int numAs = 0, numCs = 0, numGs = 0, numTs = 0,depth = 0;
|
||||||
String c1 = "", c2 = "";
|
String c1 = "", c2 = "";
|
||||||
|
|
@ -354,25 +356,23 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
||||||
|
|
||||||
//Calculate posterior probabilities
|
//Calculate posterior probabilities
|
||||||
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods();
|
DiploidSNPGenotypeLikelihoods G = new DiploidSNPGenotypeLikelihoods();
|
||||||
SAMRecord read; int offset; char base; byte qual; int mapquality; String readname;
|
|
||||||
|
|
||||||
//Check for bad bases and ensure mapping quality myself. This works.
|
for ( PileupElement p : context.getBasePileup() ) {
|
||||||
for (int i = 0; i < reads.size(); i++) {
|
byte base = p.getBase();
|
||||||
read = reads.get(i);
|
int mapquality = p.getMappingQual();
|
||||||
offset = offsets.get(i);
|
|
||||||
base = (char)read.getReadBases()[offset];
|
|
||||||
qual = read.getBaseQualities()[offset];
|
|
||||||
mapquality = read.getMappingQuality();
|
|
||||||
if (mapquality >= 5 && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
|
if (mapquality >= 5 && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
|
||||||
if (ReadsToFilter.contains(read.getReadName())){
|
String readname = p.getRead().getReadName();
|
||||||
|
if (ReadsToFilter.contains(readname)){
|
||||||
if (DEBUG){
|
if (DEBUG){
|
||||||
out.printf("\n%s %s %s %s\n",read.getReadName(),read.getAlignmentStart(),read.getAlignmentEnd(),base);
|
out.printf("\n%s %s %s %s\n",readname,p.getRead().getAlignmentStart(),p.getRead().getAlignmentEnd(),base);
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
//consider base in likelihood calculations if it looks good and has high mapping score
|
//consider base in likelihood calculations if it looks good and has high mapping score
|
||||||
G.add((byte)base, qual, read, offset);
|
// TODO-- move this outside, e.g. to ReadBackedPileup
|
||||||
readname = read.getReadName();
|
HashSet<PileupElement> fragment = new HashSet<PileupElement>();
|
||||||
if (!AllReadNames.contains(readname)){AllReadNames.add(readname); AllReads.add(read);}
|
fragment.add(p);
|
||||||
|
G.add(new PerFragmentPileupElement(fragment), true, false);
|
||||||
|
if (!AllReadNames.contains(readname)){AllReadNames.add(readname); AllReads.add(p.getRead());}
|
||||||
if (base == 'A'){numAs++; depth++;}
|
if (base == 'A'){numAs++; depth++;}
|
||||||
else if (base == 'C'){numCs++; depth++;}
|
else if (base == 'C'){numCs++; depth++;}
|
||||||
else if (base == 'T'){numTs++; depth++;}
|
else if (base == 'T'){numTs++; depth++;}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ public class
|
||||||
public void testMultiSamplePilot1() {
|
public void testMultiSamplePilot1() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||||
Arrays.asList("6c04cc01cf6bebe4bdbc025fd45c5559"));
|
Arrays.asList("e7514b0f1f2df1ca42815b5c45775f36"));
|
||||||
executeTest("testMultiSamplePilot1", spec);
|
executeTest("testMultiSamplePilot1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -33,7 +33,7 @@ public class
|
||||||
public void testMultiSamplePilot2() {
|
public void testMultiSamplePilot2() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList("84efe068164891dbec7c85ff6cc33df3"));
|
Arrays.asList("49ae129435063f47f0faf00337eb8bf7"));
|
||||||
executeTest("testMultiSamplePilot2", spec);
|
executeTest("testMultiSamplePilot2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -41,7 +41,7 @@ public class
|
||||||
public void testSingleSamplePilot2() {
|
public void testSingleSamplePilot2() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("c7decebadb35067a38b9be3c43c1fb76"));
|
Arrays.asList("8da08fe12bc0d95e548fe63681997038"));
|
||||||
executeTest("testSingleSamplePilot2", spec);
|
executeTest("testSingleSamplePilot2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -51,7 +51,7 @@ public class
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private final static String COMPRESSED_OUTPUT_MD5 = "66bcb3f7b20d4fbe7849b616bfe69c0f";
|
private final static String COMPRESSED_OUTPUT_MD5 = "950ebf5eaa245131dc22aef211526681";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCompressedOutput() {
|
public void testCompressedOutput() {
|
||||||
|
|
@ -78,7 +78,7 @@ public class
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParallelization() {
|
public void testParallelization() {
|
||||||
String md5 = "a0032a9952c94a55e0fa42e2dec33169";
|
String md5 = "73845a29994d653a4d3f01b5467a3eed";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||||
|
|
@ -105,12 +105,12 @@ public class
|
||||||
@Test
|
@Test
|
||||||
public void testParameter() {
|
public void testParameter() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( "-genotype", "4a623d4a68fba4f9f8d7e915af0cc450" );
|
e.put( "-genotype", "683ce57f2fd3acd5f6fe7599c1ace169" );
|
||||||
e.put( "-all_bases", "2ab5106795c70a63d8dd2353ee0f1427" );
|
e.put( "-all_bases", "2ddf763c208602693cad942c9ccb804c" );
|
||||||
e.put( "--min_base_quality_score 26", "9c9be22923b9ba0d588f3d37385ae4b0" );
|
e.put( "--min_base_quality_score 26", "5f1cfb9c7f82e6414d5db7aa344813ac" );
|
||||||
e.put( "--min_mapping_quality_score 26", "aa36bb2f4fc5bd6b6c4206e0a084a577" );
|
e.put( "--min_mapping_quality_score 26", "6c3ad441f3a23ade292549b1dea80932" );
|
||||||
e.put( "--max_mismatches_in_40bp_window 5", "4914ed25a8ca22c7aea983999cd6768a" );
|
e.put( "--max_mismatches_in_40bp_window 5", "5ecaf4281410b67e8e2e164f2ea0d58a" );
|
||||||
e.put( "--p_nonref_model GRID_SEARCH", "dd4fb1fb304e44b82c9cc3d4cc257172" );
|
e.put( "--p_nonref_model GRID_SEARCH", "17ffb56d078fdde335a79773e9534ce7" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -124,12 +124,12 @@ public class
|
||||||
public void testConfidence() {
|
public void testConfidence() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||||
Arrays.asList("dd4fb1fb304e44b82c9cc3d4cc257172"));
|
Arrays.asList("17ffb56d078fdde335a79773e9534ce7"));
|
||||||
executeTest("testConfidence1", spec1);
|
executeTest("testConfidence1", spec1);
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||||
Arrays.asList("eef4e314d0cdae669454232ffbbab8ea"));
|
Arrays.asList("d49ec8c1476cecb8e3153894cc0f6662"));
|
||||||
executeTest("testConfidence2", spec2);
|
executeTest("testConfidence2", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -141,8 +141,8 @@ public class
|
||||||
@Test
|
@Test
|
||||||
public void testHeterozyosity() {
|
public void testHeterozyosity() {
|
||||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||||
e.put( 0.01, "1bbd2e24ddec902339eac481565d7f0a" );
|
e.put( 0.01, "7e820c33a4c688148eec57342645c9b6" );
|
||||||
e.put( 1.0 / 1850, "472d2d6c375a7c6d2edb464a12f10742" );
|
e.put( 1.0 / 1850, "c1c48e0c4724b75f12936e22a8629457" );
|
||||||
|
|
||||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -165,7 +165,7 @@ public class
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,100,000",
|
" -L 1:10,000,000-10,100,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("eb4144006eda780d69b6979817ceb58d"));
|
Arrays.asList("5974d8c21d27d014e2d0bed695b0b42e"));
|
||||||
|
|
||||||
executeTest(String.format("testMultiTechnologies"), spec);
|
executeTest(String.format("testMultiTechnologies"), spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ public class HLACallerIntegrationTest extends WalkerTest {
|
||||||
public void testCalculateBaseLikelihoods() {
|
public void testCalculateBaseLikelihoods() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CalculateBaseLikelihoods -I " + validationDataLocation + "NA12878.HISEQ.HLA.bam -R " + b36KGReference + " -L " + intervals + " -filter " + validationDataLocation + "HLA_HISEQ.filter -maxAllowedMismatches 6 -minRequiredMatches 0 -o %s", 1,
|
"-T CalculateBaseLikelihoods -I " + validationDataLocation + "NA12878.HISEQ.HLA.bam -R " + b36KGReference + " -L " + intervals + " -filter " + validationDataLocation + "HLA_HISEQ.filter -maxAllowedMismatches 6 -minRequiredMatches 0 -o %s", 1,
|
||||||
Arrays.asList("98e64882f93bf7550457bee4182caab6"));
|
Arrays.asList("921bb354f3877e5183ca31815546b9fd"));
|
||||||
executeTest("test CalculateBaseLikelihoods", spec);
|
executeTest("test CalculateBaseLikelihoods", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue