diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index d555463bc..6638fc7a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -38,7 +38,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati for ( final Genotype genotype : genotypes ) { // we care only about variant calls with likelihoods - if ( genotype.isHomRef() ) + if ( !genotype.isHet() && !genotype.isHomVar() ) continue; AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index a8ce98945..7d3e7047d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -52,7 +52,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { protected enum GenotypeType { AA, AB, BB } - protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; + protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY; protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { this.N = N; @@ -62,24 +62,14 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { /** * Must be overridden by concrete subclasses - * @param GLs genotype likelihoods - * @param Alleles Alleles corresponding to GLs - * @param log10AlleleFrequencyPriors priors - * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results + * @param GLs genotype likelihoods + * @param Alleles Alleles corresponding to GLs + * @param log10AlleleFrequencyPriors priors + * @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results + * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results */ protected abstract void getLog10PNonRef(GenotypesContext GLs, List Alleles, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors); - - /** - * Can be overridden by concrete subclasses - * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results - * @param AFofMaxLikelihood allele frequency of max likelihood - * - * @return calls - */ - protected abstract GenotypesContext assignGenotypes(VariantContext vc, - double[] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood); + double[][] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyLikelihoods, + double[][] log10AlleleFrequencyPosteriors); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 5d0b6f0a7..77a940dcf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -27,69 +27,32 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; import java.util.*; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { - // - // code for testing purposes - // + private final static boolean DEBUG = false; + private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - private final boolean SIMPLE_GREEDY_GENOTYPER = false; - private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - private final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); } - public void getLog10PNonRef(GenotypesContext GLs, List alleles, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { + public void getLog10PNonRef(final GenotypesContext GLs, + final List alleles, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, + final double[][] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); - final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null; - final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null; - int idxDiag = numAlleles; - int incr = numAlleles - 1; - for (int k=1; k < numAlleles; k++) { - // multi-allelic approximation, part 1: Ideally - // for each alt allele compute marginal (suboptimal) posteriors - - // compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of - // as a row-wise upper triangular matrix of likelihoods. - // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. - // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD - - final int idxAA = 0; - final int idxAB = k; - // yy is always element on the diagonal. - // 2 alleles: BBelement 2 - // 3 alleles: BB element 3. CC element 5 - // 4 alleles: - final int idxBB = idxDiag; - idxDiag += incr--; - - final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - - if (numAlleles > 2) { - posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); - bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); - } - } - - if (numAlleles > 2) { - // multiallelic approximation, part 2: - // report posteriors for allele that has highest estimated AC - int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess); - for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++) - log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); - - } + //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -100,7 +63,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( sample.hasLikelihoods() ) { double[] gls = sample.getLikelihoods().getAsVector(); - if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) + if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL ) genotypeLikelihoods.add(gls); } } @@ -109,9 +72,395 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } + final static double approximateLog10SumLog10(double[] vals) { + if ( vals.length < 2 ) + throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10"); + + double approx = approximateLog10SumLog10(vals[0], vals[1]); + for ( int i = 2; i < vals.length; i++ ) + approx = approximateLog10SumLog10(approx, vals[i]); + return approx; + } + + final static double approximateLog10SumLog10(double small, double big) { + // make sure small is really the smaller value + if ( small > big ) { + final double t = big; + big = small; + small = t; + } + + if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY ) + return big; + + if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE) + return big; + + // OK, so |y-x| < tol: we use the following identity then: + // we need to compute log10(10^x + 10^y) + // By Jacobian logarithm identity, this is equal to + // max(x,y) + log10(1+10^-abs(x-y)) + // we compute the second term as a table lookup + // with integer quantization + // we have pre-stored correction for 0,0.1,0.2,... 10.0 + //final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding + int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding + + //double z =Math.log10(1+Math.pow(10.0,-diff)); + //System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind); + return big + MathUtils.jacobianLogTable[ind]; + } + + // ------------------------------------------------------------------------------------- // - // Linearized, ~O(N), implementation. + // Multi-allelic implementation. + // + // ------------------------------------------------------------------------------------- + + private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first + + // a wrapper around the int array so that we can make it hashable + private static final class ExactACcounts { + + private final int[] counts; + private int hashcode = -1; + + public ExactACcounts(final int[] counts) { + this.counts = counts; + } + + public int[] getCounts() { + return counts; + } + + @Override + public boolean equals(Object obj) { + return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false; + } + + @Override + public int hashCode() { + if ( hashcode == -1 ) + hashcode = Arrays.hashCode(counts); + return hashcode; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(counts[0]); + for ( int i = 1; i < counts.length; i++ ) { + sb.append("/"); + sb.append(counts[i]); + } + return sb.toString(); + } + } + + // This class represents a column in the Exact AC calculation matrix + private static final class ExactACset { + + // the counts of the various alternate alleles which this column represents + final ExactACcounts ACcounts; + + // the column of the matrix + final double[] log10Likelihoods; + + // mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column; + // for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB. + final HashMap ACsetIndexToPLIndex = new HashMap(); + + // to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them + final ArrayList dependentACsetsToDelete = new ArrayList(); + + + public ExactACset(final int size, final ExactACcounts ACcounts) { + this.ACcounts = ACcounts; + log10Likelihoods = new double[size]; + } + + // sum of all the non-reference alleles + public int getACsum() { + int sum = 0; + for ( int count : ACcounts.getCounts() ) + sum += count; + return sum; + } + + public boolean equals(Object obj) { + return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false; + } + } + + public static void linearExactMultiAllelic(final GenotypesContext GLs, + final int numAlternateAlleles, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, + final double[][] log10AlleleFrequencyPosteriors, + final boolean preserveData) { + + final ArrayList genotypeLikelihoods = getGLs(GLs); + final int numSamples = genotypeLikelihoods.size()-1; + final int numChr = 2*numSamples; + + // queue of AC conformations to process + final Queue ACqueue = new LinkedList(); + + // mapping of ExactACset indexes to the objects + final HashMap indexesToACset = new HashMap(numChr+1); + + // add AC=0 to the queue + int[] zeroCounts = new int[numAlternateAlleles]; + ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts)); + ACqueue.add(zeroSet); + indexesToACset.put(zeroSet.ACcounts, zeroSet); + + // keep processing while we have AC conformations that need to be calculated + double maxLog10L = Double.NEGATIVE_INFINITY; + while ( !ACqueue.isEmpty() ) { + // compute log10Likelihoods + final ExactACset set = ACqueue.remove(); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + + // adjust max likelihood seen if needed + maxLog10L = Math.max(maxLog10L, log10LofKs); + } + } + + private static double calculateAlleleCountConformation(final ExactACset set, + final ArrayList genotypeLikelihoods, + final double maxLog10L, + final int numChr, + final boolean preserveData, + final Queue ACqueue, + final HashMap indexesToACset, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, + final double[][] log10AlleleFrequencyPosteriors) { + + if ( DEBUG ) + System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); + + // compute the log10Likelihoods + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + + // clean up memory + if ( !preserveData ) { + for ( ExactACcounts index : set.dependentACsetsToDelete ) { + indexesToACset.put(index, null); + if ( DEBUG ) + System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts); + } + } + + final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; + + // can we abort early because the log10Likelihoods are so small? + if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( DEBUG ) + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); + + // no reason to keep this data around because nothing depends on it + if ( !preserveData ) + indexesToACset.put(set.ACcounts, null); + + return log10LofK; + } + + // iterate over higher frequencies if possible + final int ACwiggle = numChr - set.getACsum(); + if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies + return log10LofK; + + ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing + final int numAltAlleles = set.ACcounts.getCounts().length; + + // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. + // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + + // add conformations for the k+1 case + int PLindex = 0; + for ( int allele = 0; allele < numAltAlleles; allele++ ) { + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); + ACcountsClone[allele]++; + lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset); + } + + // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different + if ( ACwiggle > 1 ) { + for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { + for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); + ACcountsClone[allele_i]++; + ACcountsClone[allele_j]++; + lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset); + } + } + } + + // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate + // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early) + if ( !preserveData && lastSet == null ) { + if ( DEBUG ) + System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts); + lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue); + } + if ( lastSet != null ) + lastSet.dependentACsetsToDelete.add(set.ACcounts); + + return log10LofK; + } + + // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and + // also adds it as a dependency to the given callingSetIndex. + // returns the ExactACset if that set was not already in the queue and null otherwise. + private static ExactACset updateACset(final int[] ACcounts, + final int numChr, + final ExactACset callingSet, + final int PLsetIndex, + final Queue ACqueue, + final HashMap indexesToACset) { + final ExactACcounts index = new ExactACcounts(ACcounts); + boolean wasInQueue = true; + if ( !indexesToACset.containsKey(index) ) { + ExactACset set = new ExactACset(numChr/2 +1, index); + indexesToACset.put(index, set); + ACqueue.add(set); + wasInQueue = false; + } + + // add the given dependency to the set + final ExactACset set = indexesToACset.get(index); + set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex); + return wasInQueue ? null : set; + } + + private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue ACqueue) { + ExactACset set = null; + for ( ExactACset queued : ACqueue ) { + if ( queued.dependentACsetsToDelete.contains(callingSetIndex) ) + set = queued; + } + return set; + } + + private static void computeLofK(final ExactACset set, + final ArrayList genotypeLikelihoods, + final HashMap indexesToACset, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, + final double[][] log10AlleleFrequencyPosteriors) { + + set.log10Likelihoods[0] = 0.0; // the zero case + final int totalK = set.getACsum(); + + // special case for k = 0 over all k + if ( totalK == 0 ) { + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) + set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; + } + // k > 0 for at least one k + else { + // all possible likelihoods for a given cell from which to choose the max + final int numPaths = set.ACsetIndexToPLIndex.size() + 1; + final double[] log10ConformationLikelihoods = new double[numPaths]; + + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { + final double[] gl = genotypeLikelihoods.get(j); + final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + + // initialize + for ( int i = 0; i < numPaths; i++ ) + log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY; + + // deal with the AA case first + if ( totalK < 2*j-1 ) + log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + + // deal with the other possible conformations now + if ( totalK <= 2*j ) { // skip impossible conformations + int conformationIndex = 1; + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { + if ( DEBUG ) + System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey()); + log10ConformationLikelihoods[conformationIndex++] = + determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; + } + } + + final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods); + + // finally, update the L(j,k) value + set.log10Likelihoods[j] = log10Max - logDenominator; + } + } + + final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; + + // determine the power of theta to use + int nonRefAlleles = 0; + for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + if ( set.ACcounts.getCounts()[i] > 0 ) + nonRefAlleles++; + } + + // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs + for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + int AC = set.ACcounts.getCounts()[i]; + log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK); + + // for k=0 we still want to use theta + final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; + log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); + } + } + + private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) { + + // the closed form representation generalized for multiple alleles is as follows: + // AA: (2j - totalK) * (2j - totalK - 1) + // AB: 2k_b * (2j - totalK) + // AC: 2k_c * (2j - totalK) + // BB: k_b * (k_b - 1) + // BC: 2 * k_b * k_c + // CC: k_c * (k_c - 1) + + final int numAltAlleles = ACcounts.length; + + // the AX het case + if ( PLindex <= numAltAlleles ) + return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK]; + + int subtractor = numAltAlleles+1; + int subtractions = 0; + do { + PLindex -= subtractor; + subtractor--; + subtractions++; + } + while ( PLindex >= subtractor ); + + final int k_i = ACcounts[subtractions-1]; + + // the hom var case (e.g. BB, CC, DD) + final double coeff; + if ( PLindex == 0 ) { + coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1]; + } + // the het non-ref case (e.g. BC, BD, CD) + else { + final int k_j = ACcounts[subtractions+PLindex-1]; + coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j]; + } + + return coeff; + } + + + // ------------------------------------------------------------------------------------- + // + // Deprecated bi-allelic ~O(N) implementation. Kept here for posterity. // // ------------------------------------------------------------------------------------- @@ -119,6 +468,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { * A simple data structure that holds the current, prev, and prev->prev likelihoods vectors * for the exact model calculation */ +/* private final static class ExactACCache { double[] kMinus2, kMinus1, kMinus0; @@ -154,7 +504,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public int linearExact(GenotypesContext GLs, double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { + double[][] log10AlleleFrequencyLikelihoods, + double[][] log10AlleleFrequencyPosteriors) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; final int numChr = 2*numSamples; @@ -171,7 +522,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( k == 0 ) { // special case for k = 0 for ( int j=1; j <= numSamples; j++ ) { - kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA]; + kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0]; } } else { // k > 0 final double[] kMinus1 = logY.getkMinus1(); @@ -184,14 +535,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { double aa = Double.NEGATIVE_INFINITY; double ab = Double.NEGATIVE_INFINITY; if (k < 2*j-1) - aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA]; + aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0]; if (k < 2*j) - ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB]; + ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1]; double log10Max; if (k > 1) { - final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB]; + final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2]; log10Max = approximateLog10SumLog10(aa, ab, bb); } else { // we know we aren't considering the BB case, so we can use an optimized log10 function @@ -205,7 +556,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // update the posteriors vector final double log10LofK = kMinus0[numSamples]; - log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; + log10AlleleFrequencyLikelihoods[0][k] = log10LofK; + log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k]; // can we abort early? lastK = k; @@ -222,202 +574,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } final static double approximateLog10SumLog10(double a, double b, double c) { - //return softMax(new double[]{a, b, c}); return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c); } +*/ - final static double approximateLog10SumLog10(double small, double big) { - // make sure small is really the smaller value - if ( small > big ) { - final double t = big; - big = small; - small = t; - } - - if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY ) - return big; - - if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE) - return big; - - // OK, so |y-x| < tol: we use the following identity then: - // we need to compute log10(10^x + 10^y) - // By Jacobian logarithm identity, this is equal to - // max(x,y) + log10(1+10^-abs(x-y)) - // we compute the second term as a table lookup - // with integer quantization - // we have pre-stored correction for 0,0.1,0.2,... 10.0 - //final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding - int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding - - //double z =Math.log10(1+Math.pow(10.0,-diff)); - //System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind); - return big + MathUtils.jacobianLogTable[ind]; - } - - - - /** - * Can be overridden by concrete subclasses - * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results - * @param AFofMaxLikelihood allele frequency of max likelihood - * - * @return calls - */ - public GenotypesContext assignGenotypes(VariantContext vc, - double[] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood) { - if ( !vc.isVariant() ) - throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); - - - GenotypesContext GLs = vc.getGenotypes(); - double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1]; - int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1]; - - ArrayList sampleIndices = new ArrayList(); - int sampleIdx = 0; - - // todo - optimize initialization - for (int k=0; k <= AFofMaxLikelihood; k++) - for (int j=0; j <= GLs.size(); j++) - pathMetricArray[j][k] = -1e30; - - pathMetricArray[0][0] = 0.0; - - // todo = can't deal with optimal dynamic programming solution with multiallelic records - if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) { - sampleIndices.addAll(GLs.getSampleNamesOrderedByName()); - sampleIdx = GLs.size(); - } - else { - - for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) { - if ( !genotype.hasLikelihoods() ) - continue; - - double[] likelihoods = genotype.getLikelihoods().getAsVector(); - - if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) { - //System.out.print(sample.getKey()+":"); - //for (int k=0; k < likelihoods.length; k++) - // System.out.format("%4.2f ",likelihoods[k]); - //System.out.println(); - // all likelihoods are essentially the same: skip this sample and will later on force no call. - //sampleIdx++; - continue; - } - - sampleIndices.add(genotype.getSampleName()); - - for (int k=0; k <= AFofMaxLikelihood; k++) { - - double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0]; - int bestIndex = k; - - if (k>0) { - double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1]; - if (m2 > bestMetric) { - bestMetric = m2; - bestIndex = k-1; - } - } - - if (k>1) { - double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2]; - if (m2 > bestMetric) { - bestMetric = m2; - bestIndex = k-2; - } - } - - pathMetricArray[sampleIdx+1][k] = bestMetric; - tracebackArray[sampleIdx+1][k] = bestIndex; - } - sampleIdx++; - } - } - - GenotypesContext calls = GenotypesContext.create(); - - int startIdx = AFofMaxLikelihood; - for (int k = sampleIdx; k > 0; k--) { - int bestGTguess; - String sample = sampleIndices.get(k-1); - Genotype g = GLs.get(sample); - if ( !g.hasLikelihoods() ) - continue; - // if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now, - // and will add no-call genotype to GL's in a second pass - ArrayList myAlleles = new ArrayList(); - - double[] likelihoods = g.getLikelihoods().getAsVector(); - - if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) { - bestGTguess = Utils.findIndexOfMaxEntry(likelihoods); - } - else { - int newIdx = tracebackArray[k][startIdx];; - bestGTguess = startIdx - newIdx; - startIdx = newIdx; - } - - // likelihoods are stored row-wise in lower triangular matrix. IE - // for 2 alleles they have ordering AA,AB,BB - // for 3 alleles they are ordered AA,AB,BB,AC,BC,CC - // Get now alleles corresponding to best index - int kk=0; - boolean done = false; - for (int j=0; j < vc.getNAlleles(); j++) { - for (int i=0; i <= j; i++){ - if (kk++ == bestGTguess) { - if (i==0) - myAlleles.add(vc.getReference()); - else - myAlleles.add(vc.getAlternateAllele(i-1)); - - if (j==0) - myAlleles.add(vc.getReference()); - else - myAlleles.add(vc.getAlternateAllele(j-1)); - done = true; - break; - } - - } - if (done) - break; - } - - final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods); - //System.out.println(myAlleles.toString()); - calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); - } - - for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) { - if ( !genotype.hasLikelihoods() ) - continue; - - final Genotype g = GLs.get(genotype.getSampleName()); - final double[] likelihoods = genotype.getLikelihoods().getAsVector(); - - if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL) - continue; // regular likelihoods - - final double qual = Genotype.NO_LOG10_PERROR; - calls.replace(new Genotype(g.getSampleName(), NO_CALL_ALLELES, qual, null, g.getAttributes(), false)); - } - - return calls; - } - - private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { - int j = logYMatrix.length - 1; - System.out.printf("-----------------------------------%n"); - for (int k=0; k <= numChr; k++) { - double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k]; - System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); - } - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 62218416d..53600b145 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -153,6 +153,18 @@ public class UnifiedArgumentCollection { @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; + @Hidden + @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false) + public boolean MULTI_ALLELIC = false; + + /** + * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), + * then this site will be skipped and a warning printed. Note that genotyping sites with many alternate alleles is both CPU and memory intensive. + */ + @Hidden + @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) + public int MAX_ALTERNATE_ALLELES = 5; + // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! public UnifiedArgumentCollection clone() { @@ -176,10 +188,12 @@ public class UnifiedArgumentCollection { uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; uac.alleles = alleles; + uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES; // todo- arguments to remove uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION; + uac.MULTI_ALLELIC = MULTI_ALLELIC; return uac; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index c861af1a2..3a86743de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -59,6 +59,9 @@ public class UnifiedGenotyperEngine { EMIT_ALL_SITES } + protected static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + protected static final double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + // the unified argument collection private final UnifiedArgumentCollection UAC; public UnifiedArgumentCollection getUAC() { return UAC; } @@ -73,11 +76,12 @@ public class UnifiedGenotyperEngine { private ThreadLocal afcm = new ThreadLocal(); // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything - private final double[] log10AlleleFrequencyPriorsSNPs; - private final double[] log10AlleleFrequencyPriorsIndels; + private final double[][] log10AlleleFrequencyPriorsSNPs; + private final double[][] log10AlleleFrequencyPriorsIndels; // the allele frequency likelihoods (allocated once as an optimization) - private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); + private ThreadLocal log10AlleleFrequencyLikelihoods = new ThreadLocal(); + private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); // the priors object private final GenotypePriors genotypePriorsSNPs; @@ -96,6 +100,7 @@ public class UnifiedGenotyperEngine { // the standard filter to use for calls below the confidence threshold but above the emit threshold private static final Set filter = new HashSet(1); + private final GenomeLocParser genomeLocParser; private final boolean BAQEnabledOnCMDLine; @@ -113,6 +118,7 @@ public class UnifiedGenotyperEngine { @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples) { this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; + genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ this.UAC = UAC.clone(); @@ -122,10 +128,10 @@ public class UnifiedGenotyperEngine { this.annotationEngine = engine; N = 2 * this.samples.size(); - log10AlleleFrequencyPriorsSNPs = new double[N+1]; - log10AlleleFrequencyPriorsIndels = new double[N+1]; - computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP); - computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL); + log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; + log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; + computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); + computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP); genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL); @@ -152,7 +158,6 @@ public class UnifiedGenotyperEngine { } VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); - if ( vc == null ) return null; @@ -290,102 +295,170 @@ public class UnifiedGenotyperEngine { return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make(); } - // private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine - private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + return calculateGenotypes(null, null, null, null, vc, model); + } + + public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + + boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[N+1]); + log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); + log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } + // don't try to genotype too many alternate alleles + if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) { + logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument"); + return null; + } + // estimate our confidence in a reference call and return - if ( vc.getNSamples() == 0 ) + if ( vc.getNSamples() == 0 ) { + if ( limitedContext ) + return null; return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ? estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) : generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); + } // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); - // find the most likely frequency - int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); + // is the most likely frequency conformation AC=0 for all alternate alleles? + boolean bestGuessIsRef = true; + + // which alternate allele has the highest MLE AC? + int indexOfHighestAlt = -1; + int alleleCountOfHighestAlt = -1; + + // determine which alternate alleles have AF>0 + boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; + for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { + int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]); + + // if the most likely AC is not 0, then this is a good alternate allele to use + if ( indexOfBestAC != 0 ) { + altAllelesToUse[i] = true; + bestGuessIsRef = false; + } + // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele + else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + altAllelesToUse[i] = true; + } + + // keep track of the "best" alternate allele to use + if ( indexOfBestAC > alleleCountOfHighestAlt) { + alleleCountOfHighestAlt = indexOfBestAC; + indexOfHighestAlt = i; + } + } // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); + // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]); double sum = 0.0; for (int i = 1; i <= N; i++) sum += normalizedPosteriors[i]; double PofF = Math.min(sum, 1.0); // deal with precision errors double phredScaledConfidence; - if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; + phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { sum = 0.0; for (int i = 1; i <= N; i++) { - if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) break; - sum += log10AlleleFrequencyPosteriors.get()[i]; + sum += log10AlleleFrequencyPosteriors.get()[0][i]; } phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } } // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero - if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { + if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) { // technically, at this point our confidence in a reference call isn't accurately estimated // because it didn't take into account samples with no data, so let's get a better estimate - return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); + return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); } + // strip out any alleles that aren't going to be used in the VariantContext + List myAlleles; + if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { + myAlleles = new ArrayList(vc.getAlleles().size()); + myAlleles.add(vc.getReference()); + for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { + if ( altAllelesToUse[i] ) + myAlleles.add(vc.getAlternateAllele(i)); + } + } else { + // use all of the alleles if we are given them by the user + myAlleles = vc.getAlleles(); + } + + // start constructing the resulting VC + GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); + VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); + builder.log10PError(phredScaledConfidence/-10.0); + if ( ! passesCallThreshold(phredScaledConfidence) ) + builder.filters(filter); + if ( !limitedContext ) + builder.referenceBaseForIndel(refContext.getBase()); + // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse); // print out stats if we have a writer - if ( verboseWriter != null ) + if ( verboseWriter != null && !limitedContext ) printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model); // *** note that calculating strand bias involves overwriting data structures, so we do that last HashMap attributes = new HashMap(); // if the site was downsampled, record that fact - if ( rawContext.hasPileupBeenDownsampled() ) + if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); - - if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) { + if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) { //final boolean DEBUG_SLOD = false; // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); + double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); + double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; + double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); + double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; + double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; @@ -401,26 +474,12 @@ public class UnifiedGenotyperEngine { attributes.put("SB", strandScore); } - GenomeLoc loc = refContext.getLocus(); - - int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); - - Set myAlleles = new HashSet(vc.getAlleles()); - // strip out the alternate allele if it's a ref call - if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { - myAlleles = new HashSet(1); - myAlleles.add(vc.getReference()); - } - - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles); + // finish constructing the resulting VC builder.genotypes(genotypes); - builder.log10PError(phredScaledConfidence/-10.0); - if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); builder.attributes(attributes); - builder.referenceBaseForIndel(refContext.getBase()); VariantContext vcCall = builder.make(); - if ( annotationEngine != null ) { + if ( annotationEngine != null && !limitedContext ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations ReadBackedPileup pileup = null; if (rawContext.hasExtendedEventPileup()) @@ -435,83 +494,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } - // A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs - public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) { - - // initialize the data for this thread if that hasn't been done yet - if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[N+1]); - afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); - } - - // estimate our confidence in a reference call and return - if ( vc.getNSamples() == 0 ) - return null; - - // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) - clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); - - // find the most likely frequency - int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); - - // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); - double sum = 0.0; - for (int i = 1; i <= N; i++) - sum += normalizedPosteriors[i]; - double PofF = Math.min(sum, 1.0); // deal with precision errors - - double phredScaledConfidence; - if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); - if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; - } else { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); - if ( Double.isInfinite(phredScaledConfidence) ) { - sum = 0.0; - for (int i = 1; i <= N; i++) { - if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) - break; - sum += log10AlleleFrequencyPosteriors.get()[i]; - } - phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); - } - } - - // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero - if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { - // technically, at this point our confidence in a reference call isn't accurately estimated - // because it didn't take into account samples with no data, so let's get a better estimate - return null; - } - - // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); - - // *** note that calculating strand bias involves overwriting data structures, so we do that last - HashMap attributes = new HashMap(); - - int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); - - Set myAlleles = new HashSet(vc.getAlleles()); - // strip out the alternate allele if it's a ref call - if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { - myAlleles = new HashSet(1); - myAlleles.add(vc.getReference()); - } - - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles); - builder.genotypes(genotypes); - builder.log10PError(phredScaledConfidence/-10.0); - if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); - builder.attributes(attributes); - builder.referenceBaseForIndel(vc.getReferenceBaseForIndel()); - - return new VariantCallContext(builder.make(), confidentlyCalled(phredScaledConfidence, PofF)); - } - private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length @@ -604,9 +586,12 @@ public class UnifiedGenotyperEngine { return stratifiedContexts; } - protected static void clearAFarray(double[] AFs) { - for ( int i = 0; i < AFs.length; i++ ) - AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + protected static void clearAFarray(double[][] AFs) { + for ( int i = 0; i < AFs.length; i++ ) { + for ( int j = 0; j < AFs[i].length; j++ ) { + AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + } + } } private final static double[] binomialProbabilityDepthCache = new double[10000]; @@ -676,7 +661,7 @@ public class UnifiedGenotyperEngine { AFline.append(i + "/" + N + "\t"); AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); - if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) + if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) AFline.append("0.00000000\t"); else AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i])); @@ -689,8 +674,8 @@ public class UnifiedGenotyperEngine { verboseWriter.println(); } - protected boolean passesEmitThreshold(double conf, int bestAFguess) { - return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); + protected boolean passesEmitThreshold(double conf, boolean bestGuessIsRef) { + return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); } protected boolean passesCallThreshold(double conf) { @@ -744,27 +729,25 @@ public class UnifiedGenotyperEngine { return null; } - protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) { - // calculate the allele frequency priors for 1-N - double sum = 0.0; - double heterozygosity; + protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) { - if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL) - heterozygosity = UAC.INDEL_HETEROZYGOSITY; - else - heterozygosity = UAC.heterozygosity; - - for (int i = 1; i <= N; i++) { - double value = heterozygosity / (double)i; - priors[i] = Math.log10(value); - sum += value; + // the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i + for (int alleles = 1; alleles <= priors.length; alleles++) { + double sum = 0.0; + + // for each i + for (int i = 1; i <= N; i++) { + double value = Math.pow(theta, alleles) / (double)i; + priors[alleles-1][i] = Math.log10(value); + sum += value; + } + + // null frequency for AF=0 is (1 - sum(all other frequencies)) + priors[alleles-1][0] = Math.log10(1.0 - sum); } - - // null frequency for AF=0 is (1 - sum(all other frequencies)) - priors[0] = Math.log10(1.0 - sum); } - protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { + protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { switch( model ) { case SNP: return log10AlleleFrequencyPriorsSNPs; @@ -837,4 +820,88 @@ public class UnifiedGenotyperEngine { return vc; } + + /** + * @param vc variant context with genotype likelihoods + * @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use + * + * @return genotypes + */ + public GenotypesContext assignGenotypes(VariantContext vc, + boolean[] allelesToUse) { + + final GenotypesContext GLs = vc.getGenotypes(); + + final List sampleIndices = GLs.getSampleNamesOrderedByName(); + + final GenotypesContext calls = GenotypesContext.create(); + + for ( int k = GLs.size() - 1; k >= 0; k-- ) { + final String sample = sampleIndices.get(k); + final Genotype g = GLs.get(sample); + if ( !g.hasLikelihoods() ) + continue; + + final double[] likelihoods = g.getLikelihoods().getAsVector(); + + // if there is no mass on the likelihoods, then just no-call the sample + if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) { + calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false)); + continue; + } + + // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. + // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + + final int numAltAlleles = allelesToUse.length; + + // start with the assumption that the ideal genotype is homozygous reference + Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference(); + double maxLikelihoodSeen = likelihoods[0]; + int indexOfMax = 0; + + // keep track of some state + Allele firstAllele = vc.getReference(); + int subtractor = numAltAlleles + 1; + int subtractionsMade = 0; + + for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) { + if ( PLindex == subtractor ) { + firstAllele = vc.getAlternateAllele(subtractionsMade); + PLindex -= subtractor; + subtractor--; + subtractionsMade++; + + // we can skip this allele if it's not usable + if ( !allelesToUse[subtractionsMade-1] ) { + i += subtractor - 1; + PLindex += subtractor - 1; + continue; + } + } + + // we don't care about the entry if we've already seen better + if ( likelihoods[i] <= maxLikelihoodSeen ) + continue; + + // if it's usable then update the alleles + int alleleIndex = subtractionsMade + PLindex - 1; + if ( allelesToUse[alleleIndex] ) { + maxAllele1 = firstAllele; + maxAllele2 = vc.getAlternateAllele(alleleIndex); + maxLikelihoodSeen = likelihoods[i]; + indexOfMax = i; + } + } + + ArrayList myAlleles = new ArrayList(); + myAlleles.add(maxAllele1); + myAlleles.add(maxAllele2); + + final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods); + calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); + } + + return calls; + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java new file mode 100644 index 000000000..9640a8963 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -0,0 +1,104 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + + +public class ExactAFCalculationModelUnitTest extends BaseTest { + + static double[] AA1, AB1, BB1; + static double[] AA2, AB2, AC2, BB2, BC2, CC2; + static final int numSamples = 3; + static double[][] priors = new double[2][2*numSamples+1]; // flat priors + + @BeforeSuite + public void before() { + AA1 = new double[]{0.0, -20.0, -20.0}; + AB1 = new double[]{-20.0, 0.0, -20.0}; + BB1 = new double[]{-20.0, -20.0, 0.0}; + AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; + AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; + BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; + BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; + CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; + } + + private class GetGLsTest extends TestDataProvider { + GenotypesContext GLs; + int numAltAlleles; + String name; + + private GetGLsTest(String name, int numAltAlleles, Genotype... arg) { + super(GetGLsTest.class, name); + GLs = GenotypesContext.create(arg); + this.name = name; + this.numAltAlleles = numAltAlleles; + } + + public String toString() { + return String.format("%s input=%s", super.toString(), GLs); + } + } + + private static Genotype createGenotype(String name, double[] gls) { + return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls); + } + + @DataProvider(name = "getGLs") + public Object[][] createGLsData() { + + // bi-allelic case + new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1)); + new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1)); + new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1)); + new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1)); + new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1)); + new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1)); + new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1)); + new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1)); + + // tri-allelic case + new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2)); + new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2)); + new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2)); + new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2)); + new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2)); + new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2)); + new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2)); + + return GetGLsTest.getTests(GetGLsTest.class); + } + + + @Test(dataProvider = "getGLs") + public void testGLs(GetGLsTest cfg) { + + final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1]; + final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1]; + for ( int i = 0; i < 2; i++ ) { + for ( int j = 0; j < 2*numSamples+1; j++ ) { + log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + } + } + + ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); + + int nameIndex = 1; + for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { + int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); + int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]); + Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 34e1ad30e..4ae00431c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -7,7 +7,6 @@ import org.testng.annotations.Test; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; // ********************************************************************************** // @@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("286f0de92e4ce57986ba861390c6019d")); + Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a")); executeTest("test MultiSample Pilot1", spec); } @@ -45,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("d0593483e85a7d815f4c5ee6db284d2a")); + Arrays.asList("43e7a17d95b1a0cf72e669657794d802")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -53,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("3ccce5d909f8f128e496f6841836e5f7")); + Arrays.asList("ae29b9c9aacce8046dc780430540cd62")); executeTest("test SingleSample Pilot2", spec); } @@ -63,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864"; + private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65"; @Test public void testCompressedOutput() { @@ -84,7 +83,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "95614280c565ad90f8c000376fef822c"; + String md5 = "32a34362dff51d8b73a3335048516d82"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -165,8 +164,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" ); - e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" ); + e.put( 0.01, "2cb2544739e01f6c08fd820112914317" ); + e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -276,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd")); + Arrays.asList("45633d905136c86e9d3f90ce613255e5")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -286,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("f93f8a35b47bcf96594ada55e2312c73")); + Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -295,8 +294,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4")); - executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); + Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51")); + executeTest("test MultiSample Phase1 indels with complicated records", spec4); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java new file mode 100644 index 000000000..cc9021fae --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.utils.clipreads; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/29/11 + * Time: 4:53 PM + * To change this template use File | Settings | File Templates. + */ +public class CigarStringTestPair { + public String toTest; + public String expected; + + public CigarStringTestPair(String ToTest, String Expected) { + this.toTest = ToTest; + this.expected = Expected; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java new file mode 100644 index 000000000..a5524e6f1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java @@ -0,0 +1,185 @@ +package org.broadinstitute.sting.utils.clipreads; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.TextCigarCodec; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/27/11 + * Time: 6:45 AM + * To change this template use File | Settings | File Templates. + */ +public class ClipReadsTestUtils { + //Should contain all the utils needed for tests to mass produce + //reads, cigars, and other needed classes + + final static String BASES = "ACTG"; + final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 + + public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) { + // Because quals to char start at 33 for visibility + baseQuals = subtractToArray(baseQuals, 33); + + Assert.assertEquals(read.getReadBases(), readBases); + Assert.assertEquals(read.getBaseQualities(), baseQuals); + Assert.assertEquals(read.getCigarString(), cigar); + } + + public static void testCigar(GATKSAMRecord read, String cigar) { + Assert.assertEquals(read.getCigarString(), cigar); + } + + public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) { + // Because quals to chars start at 33 for visibility + baseQuals = subtractToArray(baseQuals, 33); + + if (readBases.length > 0 && baseQuals.length > 0) { + Assert.assertEquals(read.getReadBases(), readBases); + Assert.assertEquals(read.getBaseQualities(), baseQuals); + } else + Assert.assertTrue(read.isEmpty()); + } + + private static byte[] subtractToArray(byte[] array, int n) { + if (array == null) + return null; + + byte[] output = new byte[array.length]; + + for (int i = 0; i < array.length; i++) + output[i] = (byte) (array[i] - n); + + return output; + } + + // What the test read looks like + // Ref: 10 11 12 13 14 15 16 17 + // Read: 0 1 2 3 - - - - + // -------------------------------- + // Bases: A C T G - - - - + // Quals: ! + 5 ? - - - - + + public static GATKSAMRecord makeRead() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length()); + output.setReadBases(new String(BASES).getBytes()); + output.setBaseQualityString(new String(QUALS)); + + return output; + } + + public static GATKSAMRecord makeReadFromCigar(Cigar cigar) { + + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength()); + output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes()); + output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength())); + output.setCigar(cigar); + + return output; + } + + private static String cycleString(String string, int length) { + String output = ""; + int cycles = (length / string.length()) + 1; + + for (int i = 1; i < cycles; i++) + output += string; + + for (int j = 0; output.length() < length; j++) + output += string.charAt(j % string.length()); + + return output; + } + + public static Set generateCigars() { + + // This function generates every permutation of cigar strings we need. + + LinkedHashSet output = new LinkedHashSet(); + + List clippingOptionsStart = new LinkedList(); + clippingOptionsStart.add(new Cigar()); + clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S")); + clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S")); + clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H")); + + LinkedList clippingOptionsEnd = new LinkedList(); + clippingOptionsEnd.add(new Cigar()); + clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H")); + clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S")); + clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H")); + + + LinkedList indelOptions1 = new LinkedList(); + indelOptions1.add(new Cigar()); + //indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D")); + //indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") ); + indelOptions1.add(TextCigarCodec.getSingleton().decode("1I")); + indelOptions1.add(TextCigarCodec.getSingleton().decode("1D")); + + LinkedList indelOptions2 = new LinkedList(); + indelOptions2.add(new Cigar()); + indelOptions2.add(TextCigarCodec.getSingleton().decode("1I")); + indelOptions2.add(null); + + + // Start With M as base CigarElements, M, + + LinkedList base = new LinkedList(); + base.add(TextCigarCodec.getSingleton().decode("1M")); + base.add(TextCigarCodec.getSingleton().decode("5M")); + base.add(TextCigarCodec.getSingleton().decode("25M")); + // Should indel be added as a base? + + // Nested loops W00t! + for (Cigar Base : base) { + for (Cigar indelStart : indelOptions1) { + for (Cigar indelEnd : indelOptions2) { + for (Cigar clipStart : clippingOptionsStart) { + for (Cigar clipEnd : clippingOptionsEnd) { + // Create a list of Cigar Elements and construct Cigar + List CigarBuilder = new ArrayList(); + // add starting clipping (H/S) + CigarBuilder.addAll(clipStart.getCigarElements()); + // add first base (M) + CigarBuilder.addAll(Base.getCigarElements()); + // add first indel + CigarBuilder.addAll(indelStart.getCigarElements()); + // add second base (M) + CigarBuilder.addAll(Base.getCigarElements()); + // add another indel or nothing (M) + if (indelEnd != null) + CigarBuilder.addAll(indelEnd.getCigarElements()); + // add final clipping (S/H) + CigarBuilder.addAll(clipEnd.getCigarElements()); + + + output.add(new Cigar(removeConsecutiveElements(CigarBuilder))); + + } + } + } + } + } + + return output; + } + + private static List removeConsecutiveElements(List cigarBuilder) { + LinkedList output = new LinkedList(); + for (CigarElement E : cigarBuilder) { + if (output.isEmpty() || output.getLast().getOperator() != E.getOperator()) + output.add(E); + } + return output; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java new file mode 100644 index 000000000..719d04287 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java @@ -0,0 +1,69 @@ +package org.broadinstitute.sting.utils.clipreads; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.util.LinkedList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/27/11 + * Time: 5:17 AM + * To change this template use File | Settings | File Templates. + */ +public class ClippingOpUnitTest extends BaseTest { + + ClippingOp clippingOp; + GATKSAMRecord read; + + @BeforeTest + public void init() { + read = ClipReadsTestUtils.makeRead(); + } + + @Test + private void testHardClip() { + List testList = new LinkedList(); + testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start + testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end + testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start + testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end + + for (TestParameter p : testList) { + init(); + clippingOp = new ClippingOp(p.inputStart, p.inputStop); + logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar); + ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); + } + + } + + @Test + private void testSoftClip() { + List testList = new LinkedList(); + testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start + testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end + testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start + testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end + testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start + testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end + + for (TestParameter p : testList) { + init(); + clippingOp = new ClippingOp(p.inputStart, p.inputStop); + logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar); + ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read), + ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar); + } + + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index ecb5a6d33..ff33e3184 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -25,12 +25,16 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.TextCigarCodec; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; -import org.testng.annotations.*; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; import java.util.LinkedList; import java.util.List; @@ -44,188 +48,207 @@ import java.util.List; */ public class ReadClipperUnitTest extends BaseTest { - // TODO: Add error messages on failed tests + // TODO: exception testing, make cases that should fail will fail - //int debug = 0; + // TODO: add indels to all test cases - GATKSAMRecord read, expected; ReadClipper readClipper; - final static String BASES = "ACTG"; - final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 - - - public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) { - Assert.assertEquals(read.getReadBases(), readBases); - Assert.assertEquals(read.getBaseQualityString(), baseQuals); - Assert.assertEquals(read.getCigarString(), cigar); - } - - public class testParameter { - int inputStart; - int inputStop; - int substringStart; - int substringStop; - String cigar; - - public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) { - inputStart = InputStart; - inputStop = InputStop; - substringStart = SubstringStart; - substringStop = SubstringStop; - cigar = Cigar; - } - } - - // What the test read looks like - // Ref: 1 2 3 4 5 6 7 8 - // Read: 0 1 2 3 - - - - - // ----------------------------- - // Bases: A C T G - - - - - // Quals: ! + 5 ? - - - - @BeforeMethod public void init() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); - read.setReadBases(new String(BASES).getBytes()); - read.setBaseQualityString(new String(QUALS)); - - readClipper = new ReadClipper(read); - //logger.warn(read.getCigarString()); + readClipper = new ReadClipper(ClipReadsTestUtils.makeRead()); } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipBothEndsByReferenceCoordinates() { logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); //int debug = 1; //Clip whole read - Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader())); //clip 1 base - expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4); - Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes()); - Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3)); - Assert.assertEquals(expected.getCigarString(), "1H2M1H"); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13), + ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(), + "1H2M1H"); + List cigarStringTestPairs = new LinkedList(); + cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H")); + //cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H")); + + for (CigarStringTestPair pair : cigarStringTestPairs) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest))); + ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates( + ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read), + ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)), + pair.expected); + } + /* + for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) { + // The read has to be long enough to clip one base from each side + // This also filters a lot of cigars + if ( cigar.getReadLength() > 26 ) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar )); + System.out.println( "Testing Cigar: "+cigar.toString() ) ; + //cigar length reference plus soft clip + + ClipReadsTestUtils.testBaseQual( + readClipper.hardClipBothEndsByReferenceCoordinates( + ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read), + ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ), + readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(), + readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes()); + } + } + */ } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReadCoordinates() { logger.warn("Executing testHardClipByReadCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start - testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end - testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start - testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end + List testList = new LinkedList(); + testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start + testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReferenceCoordinates() { logger.warn("Executing testHardClipByReferenceCoordinates"); //logger.warn(debug); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start - testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end - testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start - testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end + List testList = new LinkedList(); + testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start + testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } + List cigarStringTestPairs = new LinkedList(); + cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M")); + //cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S")); + cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S")); + + //Clips only first base + for (CigarStringTestPair pair : cigarStringTestPairs) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest))); + ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected); + } + /* + for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) { + // The read has to be long enough to clip one base + // This also filters a lot of cigars + if ( cigar.getReadLength() > 26 ) { + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar )); + System.out.println( "Testing Cigar: "+cigar.toString() ) ; + //cigar length reference plus soft clip + + // Clip first read + ClipReadsTestUtils.testBaseQual( + readClipper.hardClipByReadCoordinates(0,0), + readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(), + readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes()); + } + } + */ } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReferenceCoordinatesLeftTail() { init(); logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start - testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start + List testList = new LinkedList(); + testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } - @Test ( enabled = true ) + @Test(enabled = true) public void testHardClipByReferenceCoordinatesRightTail() { init(); logger.warn("Executing testHardClipByReferenceCoordinatesRightTail"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end - testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end + List testList = new LinkedList(); + testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end + testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } } - @Test ( enabled = true ) // TODO This function is returning null reads + @Test(enabled = true) public void testHardClipLowQualEnds() { - + // Needs a thorough redesign logger.warn("Executing testHardClipByReferenceCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader())); - List testList = new LinkedList(); - testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start - testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start + List testList = new LinkedList(); + testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start + testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start - for ( testParameter p : testList ) { + for (TestParameter p : testList) { init(); //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), - BASES.substring(p.substringStart,p.substringStop).getBytes(), - QUALS.substring(p.substringStart,p.substringStop), - p.cigar ); + ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart), + ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), + ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), + p.cigar); } - /* todo find a better way to test lowqual tail clipping on both sides + /* todo find a better way to test lowqual tail clipping on both sides // Reverse Quals sequence readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 @@ -237,7 +260,7 @@ public class ReadClipperUnitTest extends BaseTest { init(); readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar); - testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), + testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ), BASES.substring(p.substringStart,p.substringStop).getBytes(), QUALS.substring(p.substringStart,p.substringStop), p.cigar ); @@ -245,15 +268,77 @@ public class ReadClipperUnitTest extends BaseTest { */ } - public class CigarReadMaker { - - } - - @Test ( enabled = false ) + @Test(enabled = false) public void testHardClipSoftClippedBases() { // Generate a list of cigars to test + for (Cigar cigar : ClipReadsTestUtils.generateCigars()) { + //logger.warn("Testing Cigar: "+cigar.toString()); + readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar)); + + int clipStart = 0; + int clipEnd = 0; + boolean expectEmptyRead = false; + + List cigarElements = cigar.getCigarElements(); + int CigarListLength = cigarElements.size(); + + // It will know what needs to be clipped based on the start and end of the string, hardclips and softclips + // are added to the amount to clip + if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) { + //clipStart += cigarElements.get(0).getLength(); + if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) { + clipStart += cigarElements.get(1).getLength(); + // Check for leading indel + if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) { + expectEmptyRead = true; + } + } + // Check for leading indel + else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { + expectEmptyRead = true; + } + } else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) { + clipStart += cigarElements.get(0).getLength(); + // Check for leading indel + if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) { + expectEmptyRead = true; + } + } + //Check for leading indel + else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) { + expectEmptyRead = true; + } + + if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) { + //clipEnd += cigarElements.get(CigarListLength - 1).getLength(); + if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP) + clipEnd += cigarElements.get(CigarListLength - 2).getLength(); + } else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP) + clipEnd += cigarElements.get(CigarListLength - 1).getLength(); + + String readBases = readClipper.read.getReadString(); + String baseQuals = readClipper.read.getBaseQualityString(); + + // "*" is the default empty-sequence-string and for our test it needs to be changed to "" + if (readBases.equals("*")) + readBases = ""; + if (baseQuals.equals("*")) + baseQuals = ""; + + logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s", + cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd), + baseQuals.substring(clipStart, baseQuals.length() - clipEnd))); + //if (expectEmptyRead) + // testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] ); + //else + ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(), + readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(), + baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes()); + logger.warn("Cigar: " + cigar.toString() + " PASSED!"); + } // We will use testParameter in the following way // Right tail, left tail, + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java new file mode 100644 index 000000000..155fe094e --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.clipreads; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 11/28/11 + * Time: 4:07 PM + * To change this template use File | Settings | File Templates. + */ +public class TestParameter { + int inputStart; + int inputStop; + int substringStart; + int substringStop; + String cigar; + + public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) { + inputStart = InputStart; + inputStop = InputStop; + substringStart = SubstringStart; + substringStop = SubstringStop; + cigar = Cigar; + } +} diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index 6947d4398..1d3fb2622 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -21,9 +21,6 @@ class PacbioProcessingPipeline extends QScript { @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true) var input: File = _ - @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true) - var R: String = _ - @Input(doc="Reference fasta file", shortName="R", required=true) var reference: File = _ @@ -180,7 +177,6 @@ class PacbioProcessingPipeline extends QScript { } case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { - this.resources = R this.recal_file = inRecalFile this.output_dir = outPath this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala new file mode 100644 index 000000000..483a0b60e --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.queue.pipeline + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.BaseTest + +class DataProcessingPipelineTest { + @Test + def testSimpleBAM { + val projectName = "test1" + val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam" + val spec = new PipelineTestSpec + spec.name = "DataProcessingPipeline" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -i " + BaseTest.testDir + "exampleBAM.bam", + " -D " + BaseTest.testDir + "exampleDBSNP.vcf", + " -nv ", + " -p " + projectName).mkString + spec.fileMD5s += testOut -> "69ba216bcf1e2dd9b6bd631ef99efda9" + PipelineTest.executeTest(spec) + } + + @Test + def testBWAPEBAM { + val projectName = "test2" + val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam" + val spec = new PipelineTestSpec + spec.name = "DataProcessingPipeline" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -i " + BaseTest.testDir + "exampleBAM.bam", + " -D " + BaseTest.testDir + "exampleDBSNP.vcf", + " -nv ", + " -bwa /home/unix/carneiro/bin/bwa", + " -bwape ", + " -p " + projectName).mkString + spec.fileMD5s += testOut -> "3134cbeae1561ff8e6b559241f9ed7f5" + PipelineTest.executeTest(spec) + } + +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala new file mode 100644 index 000000000..355420a93 --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -0,0 +1,45 @@ +package org.broadinstitute.sting.queue.pipeline + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.BaseTest + +class PacbioProcessingPipelineTest { + @Test + def testBAM { + val testOut = "exampleBAM.recal.bam" + val spec = new PipelineTestSpec + spec.name = "pacbioProcessingPipeline" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -i " + BaseTest.testDir + "exampleBAM.bam", + " -blasr ", + " -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString + spec.fileMD5s += testOut -> "91a88b51d00cec40596d6061aa0c9938" + PipelineTest.executeTest(spec) + } +} diff --git a/public/testdata/exampleBAM.bam b/public/testdata/exampleBAM.bam index a6ebb6fd1..319dd1a72 100644 Binary files a/public/testdata/exampleBAM.bam and b/public/testdata/exampleBAM.bam differ diff --git a/public/testdata/exampleBAM.bam.bai b/public/testdata/exampleBAM.bam.bai index cc6e1a145..052ac614b 100644 Binary files a/public/testdata/exampleBAM.bam.bai and b/public/testdata/exampleBAM.bam.bai differ diff --git a/public/testdata/exampleDBSNP.vcf b/public/testdata/exampleDBSNP.vcf new file mode 100644 index 000000000..9e7e96f51 --- /dev/null +++ b/public/testdata/exampleDBSNP.vcf @@ -0,0 +1,282 @@ +##fileformat=VCFv4.1 +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO=5% minor allele frequency in 1+ populations"> +##INFO=5% minor allele frequency in each and all populations"> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO=SubSNP->Batch.link_out"> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false" +##contig= +##phasing=partial +##reference=GRCh37.3 +##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta +##source=dbSNP +##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132 +chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132 +chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132 +chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129 +chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129 +chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129 +chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119 +chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119 +chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119 +chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125 +chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125 +chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125 +chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119 +chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135 +chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131 +chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131 +chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130 +chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130 +chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130 +chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131 +chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135 +chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129 +chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100 +chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131 +chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131 +chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83 +chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129 +chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132 +chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132 +chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129 +chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131 +chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131 +chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129 +chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100 +chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129 +chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103 +chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119 +chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103 +chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103 +chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103 +chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101 +chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125 +chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132 +chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131 +chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129 +chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129 +chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131 +chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131 +chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119 +chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125 +chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125 +chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132 +chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131 +chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100 +chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129 +chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125 +chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132 +chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125 +chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125 +chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121 +chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132 +chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120 +chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100 +chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131 +chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131 +chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134 +chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134 +chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134 +chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131 +chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129 +chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129 +chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121 +chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129 +chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121 +chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121 +chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121 +chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131 +chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129 +chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129 +chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100 +chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100 +chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108 +chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108 +chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100 +chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108 +chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108 +chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92 +chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92 +chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108 +chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111 +chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92 +chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132 +chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92 +chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108 +chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108 +chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108 +chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108 +chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111 +chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108 +chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130 +chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130 +chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125 +chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 diff --git a/public/testdata/exampleFASTA.fasta.amb b/public/testdata/exampleFASTA.fasta.amb new file mode 100644 index 000000000..986e6d603 --- /dev/null +++ b/public/testdata/exampleFASTA.fasta.amb @@ -0,0 +1 @@ +100000 1 0 diff --git a/public/testdata/exampleFASTA.fasta.ann b/public/testdata/exampleFASTA.fasta.ann new file mode 100644 index 000000000..642ddb6d7 --- /dev/null +++ b/public/testdata/exampleFASTA.fasta.ann @@ -0,0 +1,3 @@ +100000 1 11 +0 chr1 (null) +0 100000 0 diff --git a/public/testdata/exampleFASTA.fasta.bwt b/public/testdata/exampleFASTA.fasta.bwt new file mode 100644 index 000000000..fe7422280 Binary files /dev/null and b/public/testdata/exampleFASTA.fasta.bwt differ diff --git a/public/testdata/exampleFASTA.fasta.pac b/public/testdata/exampleFASTA.fasta.pac new file mode 100644 index 000000000..b0f55c0c4 Binary files /dev/null and b/public/testdata/exampleFASTA.fasta.pac differ diff --git a/public/testdata/exampleFASTA.fasta.rbwt b/public/testdata/exampleFASTA.fasta.rbwt new file mode 100644 index 000000000..f623b8c39 Binary files /dev/null and b/public/testdata/exampleFASTA.fasta.rbwt differ diff --git a/public/testdata/exampleFASTA.fasta.rpac b/public/testdata/exampleFASTA.fasta.rpac new file mode 100644 index 000000000..b88ff49eb Binary files /dev/null and b/public/testdata/exampleFASTA.fasta.rpac differ diff --git a/public/testdata/exampleFASTA.fasta.rsa b/public/testdata/exampleFASTA.fasta.rsa new file mode 100644 index 000000000..6e7e213df Binary files /dev/null and b/public/testdata/exampleFASTA.fasta.rsa differ diff --git a/public/testdata/exampleFASTA.fasta.sa b/public/testdata/exampleFASTA.fasta.sa new file mode 100644 index 000000000..d6db971b7 Binary files /dev/null and b/public/testdata/exampleFASTA.fasta.sa differ