Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Laurent Francioli 2011-12-12 12:19:44 +01:00
commit 025bdfe2cc
26 changed files with 1658 additions and 551 deletions

View File

@ -38,7 +38,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
for ( final Genotype genotype : genotypes ) {
// we care only about variant calls with likelihoods
if ( genotype.isHomRef() )
if ( !genotype.isHet() && !genotype.isHomVar() )
continue;
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());

View File

@ -52,7 +52,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
protected enum GenotypeType { AA, AB, BB }
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
this.N = N;
@ -62,24 +62,14 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
/**
* Must be overridden by concrete subclasses
* @param GLs genotype likelihoods
* @param Alleles Alleles corresponding to GLs
* @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
* @param GLs genotype likelihoods
* @param Alleles Alleles corresponding to GLs
* @param log10AlleleFrequencyPriors priors
* @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results
*/
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors);
/**
* Can be overridden by concrete subclasses
* @param vc variant context with genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
protected abstract GenotypesContext assignGenotypes(VariantContext vc,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood);
double[][] log10AlleleFrequencyPriors,
double[][] log10AlleleFrequencyLikelihoods,
double[][] log10AlleleFrequencyPosteriors);
}

View File

@ -27,69 +27,32 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream;
import java.util.*;
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
//
// code for testing purposes
//
private final static boolean DEBUG = false;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
private final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
}
public void getLog10PNonRef(GenotypesContext GLs, List<Allele> alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
public void getLog10PNonRef(final GenotypesContext GLs,
final List<Allele> alleles,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors) {
final int numAlleles = alleles.size();
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
int idxDiag = numAlleles;
int incr = numAlleles - 1;
for (int k=1; k < numAlleles; k++) {
// multi-allelic approximation, part 1: Ideally
// for each alt allele compute marginal (suboptimal) posteriors -
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
// as a row-wise upper triangular matrix of likelihoods.
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
final int idxAA = 0;
final int idxAB = k;
// yy is always element on the diagonal.
// 2 alleles: BBelement 2
// 3 alleles: BB element 3. CC element 5
// 4 alleles:
final int idxBB = idxDiag;
idxDiag += incr--;
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
if (numAlleles > 2) {
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
}
}
if (numAlleles > 2) {
// multiallelic approximation, part 2:
// report posteriors for allele that has highest estimated AC
int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess);
for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++)
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
}
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
}
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
@ -100,7 +63,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls);
}
}
@ -109,9 +72,395 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
final static double approximateLog10SumLog10(double[] vals) {
if ( vals.length < 2 )
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
double approx = approximateLog10SumLog10(vals[0], vals[1]);
for ( int i = 2; i < vals.length; i++ )
approx = approximateLog10SumLog10(approx, vals[i]);
return approx;
}
final static double approximateLog10SumLog10(double small, double big) {
// make sure small is really the smaller value
if ( small > big ) {
final double t = big;
big = small;
small = t;
}
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
return big;
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
return big;
// OK, so |y-x| < tol: we use the following identity then:
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
// we have pre-stored correction for 0,0.1,0.2,... 10.0
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
//double z =Math.log10(1+Math.pow(10.0,-diff));
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
return big + MathUtils.jacobianLogTable[ind];
}
// -------------------------------------------------------------------------------------
//
// Linearized, ~O(N), implementation.
// Multi-allelic implementation.
//
// -------------------------------------------------------------------------------------
private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// a wrapper around the int array so that we can make it hashable
private static final class ExactACcounts {
private final int[] counts;
private int hashcode = -1;
public ExactACcounts(final int[] counts) {
this.counts = counts;
}
public int[] getCounts() {
return counts;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false;
}
@Override
public int hashCode() {
if ( hashcode == -1 )
hashcode = Arrays.hashCode(counts);
return hashcode;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(counts[0]);
for ( int i = 1; i < counts.length; i++ ) {
sb.append("/");
sb.append(counts[i]);
}
return sb.toString();
}
}
// This class represents a column in the Exact AC calculation matrix
private static final class ExactACset {
// the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts;
// the column of the matrix
final double[] log10Likelihoods;
// mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column;
// for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB.
final HashMap<ExactACcounts, Integer> ACsetIndexToPLIndex = new HashMap<ExactACcounts, Integer>();
// to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them
final ArrayList<ExactACcounts> dependentACsetsToDelete = new ArrayList<ExactACcounts>();
public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts;
log10Likelihoods = new double[size];
}
// sum of all the non-reference alleles
public int getACsum() {
int sum = 0;
for ( int count : ACcounts.getCounts() )
sum += count;
return sum;
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false;
}
}
public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors,
final boolean preserveData) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples;
// queue of AC conformations to process
final Queue<ExactACset> ACqueue = new LinkedList<ExactACset>();
// mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
// add AC=0 to the queue
int[] zeroCounts = new int[numAlternateAlleles];
ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts));
ACqueue.add(zeroSet);
indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
// adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs);
}
}
private static double calculateAlleleCountConformation(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final double maxLog10L,
final int numChr,
final boolean preserveData,
final Queue<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors) {
if ( DEBUG )
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
// clean up memory
if ( !preserveData ) {
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
indexesToACset.put(index, null);
if ( DEBUG )
System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
}
}
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// can we abort early because the log10Likelihoods are so small?
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( DEBUG )
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
// no reason to keep this data around because nothing depends on it
if ( !preserveData )
indexesToACset.put(set.ACcounts, null);
return log10LofK;
}
// iterate over higher frequencies if possible
final int ACwiggle = numChr - set.getACsum();
if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies
return log10LofK;
ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing
final int numAltAlleles = set.ACcounts.getCounts().length;
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
// add conformations for the k+1 case
int PLindex = 0;
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
ACcountsClone[allele]++;
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
}
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
if ( ACwiggle > 1 ) {
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
ACcountsClone[allele_i]++;
ACcountsClone[allele_j]++;
lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset);
}
}
}
// if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
// over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
if ( !preserveData && lastSet == null ) {
if ( DEBUG )
System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
}
if ( lastSet != null )
lastSet.dependentACsetsToDelete.add(set.ACcounts);
return log10LofK;
}
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
// also adds it as a dependency to the given callingSetIndex.
// returns the ExactACset if that set was not already in the queue and null otherwise.
private static ExactACset updateACset(final int[] ACcounts,
final int numChr,
final ExactACset callingSet,
final int PLsetIndex,
final Queue<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
final ExactACcounts index = new ExactACcounts(ACcounts);
boolean wasInQueue = true;
if ( !indexesToACset.containsKey(index) ) {
ExactACset set = new ExactACset(numChr/2 +1, index);
indexesToACset.put(index, set);
ACqueue.add(set);
wasInQueue = false;
}
// add the given dependency to the set
final ExactACset set = indexesToACset.get(index);
set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
return wasInQueue ? null : set;
}
private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue<ExactACset> ACqueue) {
ExactACset set = null;
for ( ExactACset queued : ACqueue ) {
if ( queued.dependentACsetsToDelete.contains(callingSetIndex) )
set = queued;
}
return set;
}
private static void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors,
final double[][] log10AlleleFrequencyLikelihoods,
final double[][] log10AlleleFrequencyPosteriors) {
set.log10Likelihoods[0] = 0.0; // the zero case
final int totalK = set.getACsum();
// special case for k = 0 over all k
if ( totalK == 0 ) {
for ( int j = 1; j < set.log10Likelihoods.length; j++ )
set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX];
}
// k > 0 for at least one k
else {
// all possible likelihoods for a given cell from which to choose the max
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
final double[] log10ConformationLikelihoods = new double[numPaths];
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
final double[] gl = genotypeLikelihoods.get(j);
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
// initialize
for ( int i = 0; i < numPaths; i++ )
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
// deal with the AA case first
if ( totalK < 2*j-1 )
log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
// deal with the other possible conformations now
if ( totalK <= 2*j ) { // skip impossible conformations
int conformationIndex = 1;
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
if ( DEBUG )
System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
log10ConformationLikelihoods[conformationIndex++] =
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
}
}
final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods);
// finally, update the L(j,k) value
set.log10Likelihoods[j] = log10Max - logDenominator;
}
}
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// determine the power of theta to use
int nonRefAlleles = 0;
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
if ( set.ACcounts.getCounts()[i] > 0 )
nonRefAlleles++;
}
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i];
log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
// for k=0 we still want to use theta
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
}
}
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
// the closed form representation generalized for multiple alleles is as follows:
// AA: (2j - totalK) * (2j - totalK - 1)
// AB: 2k_b * (2j - totalK)
// AC: 2k_c * (2j - totalK)
// BB: k_b * (k_b - 1)
// BC: 2 * k_b * k_c
// CC: k_c * (k_c - 1)
final int numAltAlleles = ACcounts.length;
// the AX het case
if ( PLindex <= numAltAlleles )
return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK];
int subtractor = numAltAlleles+1;
int subtractions = 0;
do {
PLindex -= subtractor;
subtractor--;
subtractions++;
}
while ( PLindex >= subtractor );
final int k_i = ACcounts[subtractions-1];
// the hom var case (e.g. BB, CC, DD)
final double coeff;
if ( PLindex == 0 ) {
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
}
// the het non-ref case (e.g. BC, BD, CD)
else {
final int k_j = ACcounts[subtractions+PLindex-1];
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
}
return coeff;
}
// -------------------------------------------------------------------------------------
//
// Deprecated bi-allelic ~O(N) implementation. Kept here for posterity.
//
// -------------------------------------------------------------------------------------
@ -119,6 +468,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
* A simple data structure that holds the current, prev, and prev->prev likelihoods vectors
* for the exact model calculation
*/
/*
private final static class ExactACCache {
double[] kMinus2, kMinus1, kMinus0;
@ -154,7 +504,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
public int linearExact(GenotypesContext GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
double[][] log10AlleleFrequencyLikelihoods,
double[][] log10AlleleFrequencyPosteriors) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples;
@ -171,7 +522,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( k == 0 ) { // special case for k = 0
for ( int j=1; j <= numSamples; j++ ) {
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0];
}
} else { // k > 0
final double[] kMinus1 = logY.getkMinus1();
@ -184,14 +535,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
double aa = Double.NEGATIVE_INFINITY;
double ab = Double.NEGATIVE_INFINITY;
if (k < 2*j-1)
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA];
aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0];
if (k < 2*j)
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB];
ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1];
double log10Max;
if (k > 1) {
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB];
final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2];
log10Max = approximateLog10SumLog10(aa, ab, bb);
} else {
// we know we aren't considering the BB case, so we can use an optimized log10 function
@ -205,7 +556,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// update the posteriors vector
final double log10LofK = kMinus0[numSamples];
log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
// can we abort early?
lastK = k;
@ -222,202 +574,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
final static double approximateLog10SumLog10(double a, double b, double c) {
//return softMax(new double[]{a, b, c});
return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c);
}
*/
final static double approximateLog10SumLog10(double small, double big) {
// make sure small is really the smaller value
if ( small > big ) {
final double t = big;
big = small;
small = t;
}
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
return big;
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
return big;
// OK, so |y-x| < tol: we use the following identity then:
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
// we have pre-stored correction for 0,0.1,0.2,... 10.0
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
//double z =Math.log10(1+Math.pow(10.0,-diff));
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
return big + MathUtils.jacobianLogTable[ind];
}
/**
* Can be overridden by concrete subclasses
* @param vc variant context with genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
public GenotypesContext assignGenotypes(VariantContext vc,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
if ( !vc.isVariant() )
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
GenotypesContext GLs = vc.getGenotypes();
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1];
ArrayList<String> sampleIndices = new ArrayList<String>();
int sampleIdx = 0;
// todo - optimize initialization
for (int k=0; k <= AFofMaxLikelihood; k++)
for (int j=0; j <= GLs.size(); j++)
pathMetricArray[j][k] = -1e30;
pathMetricArray[0][0] = 0.0;
// todo = can't deal with optimal dynamic programming solution with multiallelic records
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
sampleIndices.addAll(GLs.getSampleNamesOrderedByName());
sampleIdx = GLs.size();
}
else {
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
if ( !genotype.hasLikelihoods() )
continue;
double[] likelihoods = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
//System.out.print(sample.getKey()+":");
//for (int k=0; k < likelihoods.length; k++)
// System.out.format("%4.2f ",likelihoods[k]);
//System.out.println();
// all likelihoods are essentially the same: skip this sample and will later on force no call.
//sampleIdx++;
continue;
}
sampleIndices.add(genotype.getSampleName());
for (int k=0; k <= AFofMaxLikelihood; k++) {
double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0];
int bestIndex = k;
if (k>0) {
double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1];
if (m2 > bestMetric) {
bestMetric = m2;
bestIndex = k-1;
}
}
if (k>1) {
double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2];
if (m2 > bestMetric) {
bestMetric = m2;
bestIndex = k-2;
}
}
pathMetricArray[sampleIdx+1][k] = bestMetric;
tracebackArray[sampleIdx+1][k] = bestIndex;
}
sampleIdx++;
}
}
GenotypesContext calls = GenotypesContext.create();
int startIdx = AFofMaxLikelihood;
for (int k = sampleIdx; k > 0; k--) {
int bestGTguess;
String sample = sampleIndices.get(k-1);
Genotype g = GLs.get(sample);
if ( !g.hasLikelihoods() )
continue;
// if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
// and will add no-call genotype to GL's in a second pass
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
double[] likelihoods = g.getLikelihoods().getAsVector();
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
bestGTguess = Utils.findIndexOfMaxEntry(likelihoods);
}
else {
int newIdx = tracebackArray[k][startIdx];;
bestGTguess = startIdx - newIdx;
startIdx = newIdx;
}
// likelihoods are stored row-wise in lower triangular matrix. IE
// for 2 alleles they have ordering AA,AB,BB
// for 3 alleles they are ordered AA,AB,BB,AC,BC,CC
// Get now alleles corresponding to best index
int kk=0;
boolean done = false;
for (int j=0; j < vc.getNAlleles(); j++) {
for (int i=0; i <= j; i++){
if (kk++ == bestGTguess) {
if (i==0)
myAlleles.add(vc.getReference());
else
myAlleles.add(vc.getAlternateAllele(i-1));
if (j==0)
myAlleles.add(vc.getReference());
else
myAlleles.add(vc.getAlternateAllele(j-1));
done = true;
break;
}
}
if (done)
break;
}
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods);
//System.out.println(myAlleles.toString());
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
}
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
if ( !genotype.hasLikelihoods() )
continue;
final Genotype g = GLs.get(genotype.getSampleName());
final double[] likelihoods = genotype.getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
continue; // regular likelihoods
final double qual = Genotype.NO_LOG10_PERROR;
calls.replace(new Genotype(g.getSampleName(), NO_CALL_ALLELES, qual, null, g.getAttributes(), false));
}
return calls;
}
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
int j = logYMatrix.length - 1;
System.out.printf("-----------------------------------%n");
for (int k=0; k <= numChr; k++) {
double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
}
}
}

View File

@ -153,6 +153,18 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false;
@Hidden
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false)
public boolean MULTI_ALLELIC = false;
/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then this site will be skipped and a warning printed. Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
*/
@Hidden
@Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
public int MAX_ALTERNATE_ALLELES = 5;
// Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value!
public UnifiedArgumentCollection clone() {
@ -176,10 +188,12 @@ public class UnifiedArgumentCollection {
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
uac.alleles = alleles;
uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES;
// todo- arguments to remove
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
uac.MULTI_ALLELIC = MULTI_ALLELIC;
return uac;
}

View File

@ -59,6 +59,9 @@ public class UnifiedGenotyperEngine {
EMIT_ALL_SITES
}
protected static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
protected static final double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
// the unified argument collection
private final UnifiedArgumentCollection UAC;
public UnifiedArgumentCollection getUAC() { return UAC; }
@ -73,11 +76,12 @@ public class UnifiedGenotyperEngine {
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
private final double[] log10AlleleFrequencyPriorsSNPs;
private final double[] log10AlleleFrequencyPriorsIndels;
private final double[][] log10AlleleFrequencyPriorsSNPs;
private final double[][] log10AlleleFrequencyPriorsIndels;
// the allele frequency likelihoods (allocated once as an optimization)
private ThreadLocal<double[]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[]>();
private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
// the priors object
private final GenotypePriors genotypePriorsSNPs;
@ -96,6 +100,7 @@ public class UnifiedGenotyperEngine {
// the standard filter to use for calls below the confidence threshold but above the emit threshold
private static final Set<String> filter = new HashSet<String>(1);
private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine;
@ -113,6 +118,7 @@ public class UnifiedGenotyperEngine {
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples);
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
this.UAC = UAC.clone();
@ -122,10 +128,10 @@ public class UnifiedGenotyperEngine {
this.annotationEngine = engine;
N = 2 * this.samples.size();
log10AlleleFrequencyPriorsSNPs = new double[N+1];
log10AlleleFrequencyPriorsIndels = new double[N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP);
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL);
log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
@ -152,7 +158,6 @@ public class UnifiedGenotyperEngine {
}
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
if ( vc == null )
return null;
@ -290,102 +295,170 @@ public class UnifiedGenotyperEngine {
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
}
// private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine
private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
return calculateGenotypes(null, null, null, null, vc, model);
}
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
// initialize the data for this thread if that hasn't been done yet
if ( afcm.get() == null ) {
log10AlleleFrequencyPosteriors.set(new double[N+1]);
log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
}
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument");
return null;
}
// estimate our confidence in a reference call and return
if ( vc.getNSamples() == 0 )
if ( vc.getNSamples() == 0 ) {
if ( limitedContext )
return null;
return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ?
estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) :
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
}
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
// find the most likely frequency
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
// is the most likely frequency conformation AC=0 for all alternate alleles?
boolean bestGuessIsRef = true;
// which alternate allele has the highest MLE AC?
int indexOfHighestAlt = -1;
int alleleCountOfHighestAlt = -1;
// determine which alternate alleles have AF>0
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]);
// if the most likely AC is not 0, then this is a good alternate allele to use
if ( indexOfBestAC != 0 ) {
altAllelesToUse[i] = true;
bestGuessIsRef = false;
}
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
altAllelesToUse[i] = true;
}
// keep track of the "best" alternate allele to use
if ( indexOfBestAC > alleleCountOfHighestAlt) {
alleleCountOfHighestAlt = indexOfBestAC;
indexOfHighestAlt = i;
}
}
// calculate p(f>0)
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
// TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]);
double sum = 0.0;
for (int i = 1; i <= N; i++)
sum += normalizedPosteriors[i];
double PofF = Math.min(sum, 1.0); // deal with precision errors
double phredScaledConfidence;
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0];
} else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) {
sum = 0.0;
for (int i = 1; i <= N; i++) {
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break;
sum += log10AlleleFrequencyPosteriors.get()[i];
sum += log10AlleleFrequencyPosteriors.get()[0][i];
}
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
}
}
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
}
// strip out any alleles that aren't going to be used in the VariantContext
List<Allele> myAlleles;
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
myAlleles.add(vc.getReference());
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
if ( altAllelesToUse[i] )
myAlleles.add(vc.getAlternateAllele(i));
}
} else {
// use all of the alleles if we are given them by the user
myAlleles = vc.getAlleles();
}
// start constructing the resulting VC
GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter);
if ( !limitedContext )
builder.referenceBaseForIndel(refContext.getBase());
// create the genotypes
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
// print out stats if we have a writer
if ( verboseWriter != null )
if ( verboseWriter != null && !limitedContext )
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
// *** note that calculating strand bias involves overwriting data structures, so we do that last
HashMap<String, Object> attributes = new HashMap<String, Object>();
// if the site was downsampled, record that fact
if ( rawContext.hasPileupBeenDownsampled() )
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) {
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
//final boolean DEBUG_SLOD = false;
// the overall lod
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
clearAFarray(log10AlleleFrequencyLikelihoods.get());
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1);
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -401,26 +474,12 @@ public class UnifiedGenotyperEngine {
attributes.put("SB", strandScore);
}
GenomeLoc loc = refContext.getLocus();
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
// strip out the alternate allele if it's a ref call
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new HashSet<Allele>(1);
myAlleles.add(vc.getReference());
}
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
// finish constructing the resulting VC
builder.genotypes(genotypes);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
builder.attributes(attributes);
builder.referenceBaseForIndel(refContext.getBase());
VariantContext vcCall = builder.make();
if ( annotationEngine != null ) {
if ( annotationEngine != null && !limitedContext ) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
ReadBackedPileup pileup = null;
if (rawContext.hasExtendedEventPileup())
@ -435,83 +494,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
// A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) {
// initialize the data for this thread if that hasn't been done yet
if ( afcm.get() == null ) {
log10AlleleFrequencyPosteriors.set(new double[N+1]);
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
}
// estimate our confidence in a reference call and return
if ( vc.getNSamples() == 0 )
return null;
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
// find the most likely frequency
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get());
// calculate p(f>0)
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get());
double sum = 0.0;
for (int i = 1; i <= N; i++)
sum += normalizedPosteriors[i];
double PofF = Math.min(sum, 1.0); // deal with precision errors
double phredScaledConfidence;
if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0];
} else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) {
sum = 0.0;
for (int i = 1; i <= N; i++) {
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break;
sum += log10AlleleFrequencyPosteriors.get()[i];
}
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
}
}
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
return null;
}
// create the genotypes
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
// *** note that calculating strand bias involves overwriting data structures, so we do that last
HashMap<String, Object> attributes = new HashMap<String, Object>();
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
// strip out the alternate allele if it's a ref call
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new HashSet<Allele>(1);
myAlleles.add(vc.getReference());
}
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles);
builder.genotypes(genotypes);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter);
builder.attributes(attributes);
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
return new VariantCallContext(builder.make(), confidentlyCalled(phredScaledConfidence, PofF));
}
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
@ -604,9 +586,12 @@ public class UnifiedGenotyperEngine {
return stratifiedContexts;
}
protected static void clearAFarray(double[] AFs) {
for ( int i = 0; i < AFs.length; i++ )
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
protected static void clearAFarray(double[][] AFs) {
for ( int i = 0; i < AFs.length; i++ ) {
for ( int j = 0; j < AFs[i].length; j++ ) {
AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
}
}
private final static double[] binomialProbabilityDepthCache = new double[10000];
@ -676,7 +661,7 @@ public class UnifiedGenotyperEngine {
AFline.append(i + "/" + N + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/N));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
AFline.append("0.00000000\t");
else
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
@ -689,8 +674,8 @@ public class UnifiedGenotyperEngine {
verboseWriter.println();
}
protected boolean passesEmitThreshold(double conf, int bestAFguess) {
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
protected boolean passesEmitThreshold(double conf, boolean bestGuessIsRef) {
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
}
protected boolean passesCallThreshold(double conf) {
@ -744,27 +729,25 @@ public class UnifiedGenotyperEngine {
return null;
}
protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) {
// calculate the allele frequency priors for 1-N
double sum = 0.0;
double heterozygosity;
protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) {
if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL)
heterozygosity = UAC.INDEL_HETEROZYGOSITY;
else
heterozygosity = UAC.heterozygosity;
for (int i = 1; i <= N; i++) {
double value = heterozygosity / (double)i;
priors[i] = Math.log10(value);
sum += value;
// the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
for (int alleles = 1; alleles <= priors.length; alleles++) {
double sum = 0.0;
// for each i
for (int i = 1; i <= N; i++) {
double value = Math.pow(theta, alleles) / (double)i;
priors[alleles-1][i] = Math.log10(value);
sum += value;
}
// null frequency for AF=0 is (1 - sum(all other frequencies))
priors[alleles-1][0] = Math.log10(1.0 - sum);
}
// null frequency for AF=0 is (1 - sum(all other frequencies))
priors[0] = Math.log10(1.0 - sum);
}
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) {
case SNP:
return log10AlleleFrequencyPriorsSNPs;
@ -837,4 +820,88 @@ public class UnifiedGenotyperEngine {
return vc;
}
/**
* @param vc variant context with genotype likelihoods
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
*
* @return genotypes
*/
public GenotypesContext assignGenotypes(VariantContext vc,
boolean[] allelesToUse) {
final GenotypesContext GLs = vc.getGenotypes();
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
final GenotypesContext calls = GenotypesContext.create();
for ( int k = GLs.size() - 1; k >= 0; k-- ) {
final String sample = sampleIndices.get(k);
final Genotype g = GLs.get(sample);
if ( !g.hasLikelihoods() )
continue;
final double[] likelihoods = g.getLikelihoods().getAsVector();
// if there is no mass on the likelihoods, then just no-call the sample
if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) {
calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
continue;
}
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
final int numAltAlleles = allelesToUse.length;
// start with the assumption that the ideal genotype is homozygous reference
Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference();
double maxLikelihoodSeen = likelihoods[0];
int indexOfMax = 0;
// keep track of some state
Allele firstAllele = vc.getReference();
int subtractor = numAltAlleles + 1;
int subtractionsMade = 0;
for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) {
if ( PLindex == subtractor ) {
firstAllele = vc.getAlternateAllele(subtractionsMade);
PLindex -= subtractor;
subtractor--;
subtractionsMade++;
// we can skip this allele if it's not usable
if ( !allelesToUse[subtractionsMade-1] ) {
i += subtractor - 1;
PLindex += subtractor - 1;
continue;
}
}
// we don't care about the entry if we've already seen better
if ( likelihoods[i] <= maxLikelihoodSeen )
continue;
// if it's usable then update the alleles
int alleleIndex = subtractionsMade + PLindex - 1;
if ( allelesToUse[alleleIndex] ) {
maxAllele1 = firstAllele;
maxAllele2 = vc.getAlternateAllele(alleleIndex);
maxLikelihoodSeen = likelihoods[i];
indexOfMax = i;
}
}
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
myAlleles.add(maxAllele1);
myAlleles.add(maxAllele2);
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods);
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
}
return calls;
}
}

View File

@ -0,0 +1,104 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
public class ExactAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
static final int numSamples = 3;
static double[][] priors = new double[2][2*numSamples+1]; // flat priors
@BeforeSuite
public void before() {
AA1 = new double[]{0.0, -20.0, -20.0};
AB1 = new double[]{-20.0, 0.0, -20.0};
BB1 = new double[]{-20.0, -20.0, 0.0};
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
}
private class GetGLsTest extends TestDataProvider {
GenotypesContext GLs;
int numAltAlleles;
String name;
private GetGLsTest(String name, int numAltAlleles, Genotype... arg) {
super(GetGLsTest.class, name);
GLs = GenotypesContext.create(arg);
this.name = name;
this.numAltAlleles = numAltAlleles;
}
public String toString() {
return String.format("%s input=%s", super.toString(), GLs);
}
}
private static Genotype createGenotype(String name, double[] gls) {
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
}
@DataProvider(name = "getGLs")
public Object[][] createGLsData() {
// bi-allelic case
new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1));
new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1));
new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1));
new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1));
new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1));
new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1));
new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1));
new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1));
// tri-allelic case
new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2));
new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2));
new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2));
new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2));
new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2));
new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2));
new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2));
return GetGLsTest.getTests(GetGLsTest.class);
}
@Test(dataProvider = "getGLs")
public void testGLs(GetGLsTest cfg) {
final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1];
final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1];
for ( int i = 0; i < 2; i++ ) {
for ( int j = 0; j < 2*numSamples+1; j++ ) {
log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
}
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]);
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
}
}
}

View File

@ -7,7 +7,6 @@ import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
// ********************************************************************************** //
@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("286f0de92e4ce57986ba861390c6019d"));
Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a"));
executeTest("test MultiSample Pilot1", spec);
}
@ -45,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("d0593483e85a7d815f4c5ee6db284d2a"));
Arrays.asList("43e7a17d95b1a0cf72e669657794d802"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -53,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
Arrays.asList("ae29b9c9aacce8046dc780430540cd62"));
executeTest("test SingleSample Pilot2", spec);
}
@ -63,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65";
@Test
public void testCompressedOutput() {
@ -84,7 +83,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "95614280c565ad90f8c000376fef822c";
String md5 = "32a34362dff51d8b73a3335048516d82";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -165,8 +164,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
e.put( 0.01, "2cb2544739e01f6c08fd820112914317" );
e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -276,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd"));
Arrays.asList("45633d905136c86e9d3f90ce613255e5"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@ -286,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("f93f8a35b47bcf96594ada55e2312c73"));
Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -295,8 +294,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}
@Test

View File

@ -0,0 +1,18 @@
package org.broadinstitute.sting.utils.clipreads;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/29/11
* Time: 4:53 PM
* To change this template use File | Settings | File Templates.
*/
public class CigarStringTestPair {
public String toTest;
public String expected;
public CigarStringTestPair(String ToTest, String Expected) {
this.toTest = ToTest;
this.expected = Expected;
}
}

View File

@ -0,0 +1,185 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/27/11
* Time: 6:45 AM
* To change this template use File | Settings | File Templates.
*/
public class ClipReadsTestUtils {
//Should contain all the utils needed for tests to mass produce
//reads, cigars, and other needed classes
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
// Because quals to char start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
Assert.assertEquals(read.getCigarString(), cigar);
}
public static void testCigar(GATKSAMRecord read, String cigar) {
Assert.assertEquals(read.getCigarString(), cigar);
}
public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) {
// Because quals to chars start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
if (readBases.length > 0 && baseQuals.length > 0) {
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualities(), baseQuals);
} else
Assert.assertTrue(read.isEmpty());
}
private static byte[] subtractToArray(byte[] array, int n) {
if (array == null)
return null;
byte[] output = new byte[array.length];
for (int i = 0; i < array.length; i++)
output[i] = (byte) (array[i] - n);
return output;
}
// What the test read looks like
// Ref: 10 11 12 13 14 15 16 17
// Read: 0 1 2 3 - - - -
// --------------------------------
// Bases: A C T G - - - -
// Quals: ! + 5 ? - - - -
public static GATKSAMRecord makeRead() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length());
output.setReadBases(new String(BASES).getBytes());
output.setBaseQualityString(new String(QUALS));
return output;
}
public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength());
output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes());
output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength()));
output.setCigar(cigar);
return output;
}
private static String cycleString(String string, int length) {
String output = "";
int cycles = (length / string.length()) + 1;
for (int i = 1; i < cycles; i++)
output += string;
for (int j = 0; output.length() < length; j++)
output += string.charAt(j % string.length());
return output;
}
public static Set<Cigar> generateCigars() {
// This function generates every permutation of cigar strings we need.
LinkedHashSet<Cigar> output = new LinkedHashSet<Cigar>();
List<Cigar> clippingOptionsStart = new LinkedList<Cigar>();
clippingOptionsStart.add(new Cigar());
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S"));
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> clippingOptionsEnd = new LinkedList<Cigar>();
clippingOptionsEnd.add(new Cigar());
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H"));
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S"));
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H"));
LinkedList<Cigar> indelOptions1 = new LinkedList<Cigar>();
indelOptions1.add(new Cigar());
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D"));
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") );
indelOptions1.add(TextCigarCodec.getSingleton().decode("1I"));
indelOptions1.add(TextCigarCodec.getSingleton().decode("1D"));
LinkedList<Cigar> indelOptions2 = new LinkedList<Cigar>();
indelOptions2.add(new Cigar());
indelOptions2.add(TextCigarCodec.getSingleton().decode("1I"));
indelOptions2.add(null);
// Start With M as base CigarElements, M,
LinkedList<Cigar> base = new LinkedList<Cigar>();
base.add(TextCigarCodec.getSingleton().decode("1M"));
base.add(TextCigarCodec.getSingleton().decode("5M"));
base.add(TextCigarCodec.getSingleton().decode("25M"));
// Should indel be added as a base?
// Nested loops W00t!
for (Cigar Base : base) {
for (Cigar indelStart : indelOptions1) {
for (Cigar indelEnd : indelOptions2) {
for (Cigar clipStart : clippingOptionsStart) {
for (Cigar clipEnd : clippingOptionsEnd) {
// Create a list of Cigar Elements and construct Cigar
List<CigarElement> CigarBuilder = new ArrayList<CigarElement>();
// add starting clipping (H/S)
CigarBuilder.addAll(clipStart.getCigarElements());
// add first base (M)
CigarBuilder.addAll(Base.getCigarElements());
// add first indel
CigarBuilder.addAll(indelStart.getCigarElements());
// add second base (M)
CigarBuilder.addAll(Base.getCigarElements());
// add another indel or nothing (M)
if (indelEnd != null)
CigarBuilder.addAll(indelEnd.getCigarElements());
// add final clipping (S/H)
CigarBuilder.addAll(clipEnd.getCigarElements());
output.add(new Cigar(removeConsecutiveElements(CigarBuilder)));
}
}
}
}
}
return output;
}
private static List<CigarElement> removeConsecutiveElements(List<CigarElement> cigarBuilder) {
LinkedList<CigarElement> output = new LinkedList<CigarElement>();
for (CigarElement E : cigarBuilder) {
if (output.isEmpty() || output.getLast().getOperator() != E.getOperator())
output.add(E);
}
return output;
}
}

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.utils.clipreads;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/27/11
* Time: 5:17 AM
* To change this template use File | Settings | File Templates.
*/
public class ClippingOpUnitTest extends BaseTest {
ClippingOp clippingOp;
GATKSAMRecord read;
@BeforeTest
public void init() {
read = ClipReadsTestUtils.makeRead();
}
@Test
private void testHardClip() {
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
for (TestParameter p : testList) {
init();
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test
private void testSoftClip() {
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
for (TestParameter p : testList) {
init();
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
}
}
}

View File

@ -25,12 +25,16 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.*;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
@ -44,188 +48,207 @@ import java.util.List;
*/
public class ReadClipperUnitTest extends BaseTest {
// TODO: Add error messages on failed tests
// TODO: exception testing, make cases that should fail will fail
//int debug = 0;
// TODO: add indels to all test cases
GATKSAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) {
Assert.assertEquals(read.getReadBases(), readBases);
Assert.assertEquals(read.getBaseQualityString(), baseQuals);
Assert.assertEquals(read.getCigarString(), cigar);
}
public class testParameter {
int inputStart;
int inputStop;
int substringStart;
int substringStop;
String cigar;
public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
inputStart = InputStart;
inputStop = InputStop;
substringStart = SubstringStart;
substringStop = SubstringStop;
cigar = Cigar;
}
}
// What the test read looks like
// Ref: 1 2 3 4 5 6 7 8
// Read: 0 1 2 3 - - - -
// -----------------------------
// Bases: A C T G - - - -
// Quals: ! + 5 ? - - - -
@BeforeMethod
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
readClipper = new ReadClipper(read);
//logger.warn(read.getCigarString());
readClipper = new ReadClipper(ClipReadsTestUtils.makeRead());
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//int debug = 1;
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader()));
//clip 1 base
expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13),
ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(),
"1H2M1H");
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H"));
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H"));
for (CigarStringTestPair pair : cigarStringTestPairs) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates(
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)),
pair.expected);
}
/*
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
// The read has to be long enough to clip one base from each side
// This also filters a lot of cigars
if ( cigar.getReadLength() > 26 ) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
//cigar length reference plus soft clip
ClipReadsTestUtils.testBaseQual(
readClipper.hardClipBothEndsByReferenceCoordinates(
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ),
readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(),
readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes());
}
}
*/
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start
testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//logger.warn(debug);
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start
testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start
testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M"));
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S"));
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S"));
//Clips only first base
for (CigarStringTestPair pair : cigarStringTestPairs) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected);
}
/*
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
// The read has to be long enough to clip one base
// This also filters a lot of cigars
if ( cigar.getReadLength() > 26 ) {
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
//cigar length reference plus soft clip
// Clip first read
ClipReadsTestUtils.testBaseQual(
readClipper.hardClipByReadCoordinates(0,0),
readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(),
readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes());
}
}
*/
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesLeftTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test ( enabled = true )
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesRightTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end
testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
}
@Test ( enabled = true ) // TODO This function is returning null reads
@Test(enabled = true)
public void testHardClipLowQualEnds() {
// Needs a thorough redesign
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader()));
List<testParameter> testList = new LinkedList<testParameter>();
testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start
testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
for ( testParameter p : testList ) {
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
/* todo find a better way to test lowqual tail clipping on both sides
/* todo find a better way to test lowqual tail clipping on both sides
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
@ -237,7 +260,7 @@ public class ReadClipperUnitTest extends BaseTest {
init();
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
@ -245,15 +268,77 @@ public class ReadClipperUnitTest extends BaseTest {
*/
}
public class CigarReadMaker {
}
@Test ( enabled = false )
@Test(enabled = false)
public void testHardClipSoftClippedBases() {
// Generate a list of cigars to test
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
//logger.warn("Testing Cigar: "+cigar.toString());
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar));
int clipStart = 0;
int clipEnd = 0;
boolean expectEmptyRead = false;
List<CigarElement> cigarElements = cigar.getCigarElements();
int CigarListLength = cigarElements.size();
// It will know what needs to be clipped based on the start and end of the string, hardclips and softclips
// are added to the amount to clip
if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) {
//clipStart += cigarElements.get(0).getLength();
if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(1).getLength();
// Check for leading indel
if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
// Check for leading indel
else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
} else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
clipStart += cigarElements.get(0).getLength();
// Check for leading indel
if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
}
//Check for leading indel
else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) {
expectEmptyRead = true;
}
if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) {
//clipEnd += cigarElements.get(CigarListLength - 1).getLength();
if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 2).getLength();
} else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP)
clipEnd += cigarElements.get(CigarListLength - 1).getLength();
String readBases = readClipper.read.getReadString();
String baseQuals = readClipper.read.getBaseQualityString();
// "*" is the default empty-sequence-string and for our test it needs to be changed to ""
if (readBases.equals("*"))
readBases = "";
if (baseQuals.equals("*"))
baseQuals = "";
logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s",
cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd)));
//if (expectEmptyRead)
// testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] );
//else
ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(),
readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(),
baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes());
logger.warn("Cigar: " + cigar.toString() + " PASSED!");
}
// We will use testParameter in the following way
// Right tail, left tail,
}
}

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.clipreads;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/28/11
* Time: 4:07 PM
* To change this template use File | Settings | File Templates.
*/
public class TestParameter {
int inputStart;
int inputStop;
int substringStart;
int substringStop;
String cigar;
public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
inputStart = InputStart;
inputStop = InputStop;
substringStart = SubstringStart;
substringStop = SubstringStop;
cigar = Cigar;
}
}

View File

@ -21,9 +21,6 @@ class PacbioProcessingPipeline extends QScript {
@Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true)
var input: File = _
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
var R: String = _
@Input(doc="Reference fasta file", shortName="R", required=true)
var reference: File = _
@ -180,7 +177,6 @@ class PacbioProcessingPipeline extends QScript {
}
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
this.resources = R
this.recal_file = inRecalFile
this.output_dir = outPath
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"

View File

@ -0,0 +1,67 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class DataProcessingPipelineTest {
@Test
def testSimpleBAM {
val projectName = "test1"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "69ba216bcf1e2dd9b6bd631ef99efda9"
PipelineTest.executeTest(spec)
}
@Test
def testBWAPEBAM {
val projectName = "test2"
val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
val spec = new PipelineTestSpec
spec.name = "DataProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf",
" -nv ",
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "3134cbeae1561ff8e6b559241f9ed7f5"
PipelineTest.executeTest(spec)
}
}

View File

@ -0,0 +1,45 @@
package org.broadinstitute.sting.queue.pipeline
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.testng.annotations.Test
import org.broadinstitute.sting.BaseTest
class PacbioProcessingPipelineTest {
@Test
def testBAM {
val testOut = "exampleBAM.recal.bam"
val spec = new PipelineTestSpec
spec.name = "pacbioProcessingPipeline"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -i " + BaseTest.testDir + "exampleBAM.bam",
" -blasr ",
" -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "91a88b51d00cec40596d6061aa0c9938"
PipelineTest.executeTest(spec)
}
}

Binary file not shown.

Binary file not shown.

282
public/testdata/exampleDBSNP.vcf vendored 100644
View File

@ -0,0 +1,282 @@
##fileformat=VCFv4.1
##FILTER=<ID=NC,Description="Inconsistent Genotype Submission For At Least One Sample">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=ASP,Number=0,Type=Flag,Description="Is Assembly specific. This is set if the variant only maps to one assembly">
##INFO=<ID=ASS,Number=0,Type=Flag,Description="In acceptor splice site FxnCode = 73">
##INFO=<ID=CDA,Number=0,Type=Flag,Description="Variation is interrogated in a clinical diagnostic assay">
##INFO=<ID=CFL,Number=0,Type=Flag,Description="Has Assembly conflict. This is for weight 1 and 2 variant that maps to different chromosomes on different assemblies.">
##INFO=<ID=CLN,Number=0,Type=Flag,Description="Variant is Clinical(LSDB,OMIM,TPA,Diagnostic)">
##INFO=<ID=DSS,Number=0,Type=Flag,Description="In donor splice-site FxnCode = 75">
##INFO=<ID=G5,Number=0,Type=Flag,Description=">5% minor allele frequency in 1+ populations">
##INFO=<ID=G5A,Number=0,Type=Flag,Description=">5% minor allele frequency in each and all populations">
##INFO=<ID=GCF,Number=0,Type=Flag,Description="Has Genotype Conflict Same (rs, ind), different genotype. N/N is not included.">
##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
##INFO=<ID=GMAF,Number=1,Type=Float,Description="Global Minor Allele Frequency [0, 0.5]; global population is 1000GenomesProject phase 1 genotype data from 629 individuals, released in the 08-04-2010 dataset">
##INFO=<ID=GNO,Number=0,Type=Flag,Description="Genotypes available. The variant has individual genotype (in SubInd table).">
##INFO=<ID=HD,Number=0,Type=Flag,Description="Marker is on high density genotyping kit (50K density or greater). The variant may have phenotype associations present in dbGaP.">
##INFO=<ID=INT,Number=0,Type=Flag,Description="In Intron FxnCode = 6">
##INFO=<ID=KGPROD,Number=0,Type=Flag,Description="1000 Genome production phase">
##INFO=<ID=KGPilot1,Number=0,Type=Flag,Description="1000 Genome discovery(pilot1) 2009">
##INFO=<ID=KGPilot123,Number=0,Type=Flag,Description="1000 Genome discovery all pilots 2010(1,2,3)">
##INFO=<ID=KGVAL,Number=0,Type=Flag,Description="1000 Genome validated by second method">
##INFO=<ID=LSD,Number=0,Type=Flag,Description="Submitted from a locus-specific database">
##INFO=<ID=MTP,Number=0,Type=Flag,Description="Microattribution/third-party annotation(TPA:GWAS,PAGE)">
##INFO=<ID=MUT,Number=0,Type=Flag,Description="Is mutation (journal citation, explicit fact): a low frequency variation that is cited in journal and other reputable sources">
##INFO=<ID=NOC,Number=0,Type=Flag,Description="Contig allele not present in variant allele list. The reference sequence allele at the mapped position is not present in the variant allele list, adjusted for orientation.">
##INFO=<ID=NOV,Number=0,Type=Flag,Description="Rs cluster has non-overlapping allele sets. True when rs set has more than 2 alleles from different submissions and these sets share no alleles in common.">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=NSF,Number=0,Type=Flag,Description="Has non-synonymous frameshift A coding region variation where one allele in the set changes all downstream amino acids. FxnClass = 44">
##INFO=<ID=NSM,Number=0,Type=Flag,Description="Has non-synonymous missense A coding region variation where one allele in the set changes protein peptide. FxnClass = 42">
##INFO=<ID=NSN,Number=0,Type=Flag,Description="Has non-synonymous nonsense A coding region variation where one allele in the set changes to STOP codon (TER). FxnClass = 41">
##INFO=<ID=OM,Number=0,Type=Flag,Description="Has OMIM/OMIA">
##INFO=<ID=OTH,Number=0,Type=Flag,Description="Has other variant with exactly the same set of mapped positions on NCBI refernce assembly.">
##INFO=<ID=PH1,Number=0,Type=Flag,Description="Phase 1 genotyped: filtered, non-redundant">
##INFO=<ID=PH2,Number=0,Type=Flag,Description="Phase 2 genotyped: filtered, non-redundant">
##INFO=<ID=PH3,Number=0,Type=Flag,Description="Phase 3 genotyped: filtered, non-redundant">
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
##INFO=<ID=PMC,Number=0,Type=Flag,Description="Links exist to PubMed Central article">
##INFO=<ID=R3,Number=0,Type=Flag,Description="In 3' gene region FxnCode = 13">
##INFO=<ID=R5,Number=0,Type=Flag,Description="In 5' gene region FxnCode = 15">
##INFO=<ID=REF,Number=0,Type=Flag,Description="Has reference A coding region variation where one allele in the set is identical to the reference sequence. FxnCode = 8">
##INFO=<ID=RSPOS,Number=1,Type=Integer,Description="Chr position reported in dbSNP">
##INFO=<ID=RV,Number=0,Type=Flag,Description="RS orientation is reversed">
##INFO=<ID=S3D,Number=0,Type=Flag,Description="Has 3D structure - SNP3D table">
##INFO=<ID=SAO,Number=1,Type=Integer,Description="Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 - Both">
##INFO=<ID=SCS,Number=1,Type=Integer,Description="Variant Clinical Significance, 0 - unknown, 1 - untested, 2 - non-pathogenic, 3 - probable-non-pathogenic, 4 - probable-pathogenic, 5 - pathogenic, 6 - drug-response, 7 - histocompatibility, 255 - other">
##INFO=<ID=SLO,Number=0,Type=Flag,Description="Has SubmitterLinkOut - From SNP->SubSNP->Batch.link_out">
##INFO=<ID=SSR,Number=1,Type=Integer,Description="Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 - other">
##INFO=<ID=SYN,Number=0,Type=Flag,Description="Has synonymous A coding region variation where one allele in the set does not change the encoded amino acid. FxnCode = 3">
##INFO=<ID=TPA,Number=0,Type=Flag,Description="Provisional Third Party Annotation(TPA) (currently rs from PHARMGKB who will give phenotype data)">
##INFO=<ID=U3,Number=0,Type=Flag,Description="In 3' UTR Location is in an untranslated region (UTR). FxnCode = 53">
##INFO=<ID=U5,Number=0,Type=Flag,Description="In 5' UTR Location is in an untranslated region (UTR). FxnCode = 55">
##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
##INFO=<ID=VLD,Number=0,Type=Flag,Description="Is Validated. This bit is set if the variant has 2+ minor allele count based on frequency or genotype data.">
##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property">
##INFO=<ID=WGT,Number=1,Type=Integer,Description="Weight, 00 - unmapped, 1 - weight 1, 2 - weight 2, 3 - weight 3 or more">
##INFO=<ID=WTD,Number=0,Type=Flag,Description="Is Withdrawn by submitter If one member ss is withdrawn by submitter, then this bit is set. If all member ss' are withdrawn, then the rs is deleted to SNPHistory">
##INFO=<ID=dbSNPBuildID,Number=1,Type=Integer,Description="First dbSNP Build for RS">
##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false"
##contig=<ID=chr1,length=249250621,assembly=b37>
##phasing=partial
##reference=GRCh37.3
##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
##source=dbSNP
##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132
chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134
chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129
chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129
chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119
chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119
chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125
chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119
chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130
chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135
chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129
chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130
chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100
chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131
chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131
chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83
chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132
chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132
chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129
chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131
chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129
chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100
chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129
chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103
chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119
chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103
chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103
chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101
chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125
chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134
chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129
chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129
chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131
chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119
chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125
chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125
chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131
chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132
chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131
chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100
chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129
chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125
chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132
chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125
chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121
chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132
chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120
chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100
chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131
chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131
chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134
chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131
chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135
chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129
chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134
chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129
chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121
chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129
chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121
chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131
chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129
chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129
chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100
chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100
chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108
chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108
chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100
chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92
chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111
chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92
chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132
chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92
chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108
chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108
chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134
chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108
chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111
chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108
chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130
chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125
chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129
chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135
chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135

View File

@ -0,0 +1 @@
100000 1 0

View File

@ -0,0 +1,3 @@
100000 1 11
0 chr1 (null)
0 100000 0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.